diff options
125 files changed, 4341 insertions, 3333 deletions
diff --git a/Makefile.objs b/Makefile.objs index 3c7abca433..20fb2c54f0 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -92,6 +92,8 @@ common-obj-$(CONFIG_SPICE) += spice-qemu-char.o common-obj-y += audio/ common-obj-y += hw/ +extra-obj-y += hw/ + common-obj-y += ui/ common-obj-y += bt-host.o bt-vhci.o diff --git a/arch_init.c b/arch_init.c index e307b23310..1645f3079a 100644 --- a/arch_init.c +++ b/arch_init.c @@ -568,7 +568,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; migration_bitmap = bitmap_new(ram_pages); - bitmap_set(migration_bitmap, 1, ram_pages); + bitmap_set(migration_bitmap, 0, ram_pages); migration_dirty_pages = ram_pages; bytes_transferred = 0; @@ -840,7 +840,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id) memset(host, ch, TARGET_PAGE_SIZE); #ifndef _WIN32 if (ch == 0 && - (!kvm_enabled() || kvm_has_sync_mmu())) { + (!kvm_enabled() || kvm_has_sync_mmu()) && + getpagesize() <= TARGET_PAGE_SIZE) { qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); } #endif @@ -215,8 +215,3 @@ void aio_context_unref(AioContext *ctx) { g_source_unref(&ctx->source); } - -void aio_flush(AioContext *ctx) -{ - while (aio_poll(ctx, true)); -} @@ -518,22 +518,16 @@ BlockDriver *bdrv_find_protocol(const char *filename) return NULL; } -static int find_image_format(const char *filename, BlockDriver **pdrv) +static int find_image_format(BlockDriverState *bs, const char *filename, + BlockDriver **pdrv) { - int ret, score, score_max; + int score, score_max; BlockDriver *drv1, *drv; uint8_t buf[2048]; - BlockDriverState *bs; - - ret = bdrv_file_open(&bs, filename, 0); - if (ret < 0) { - *pdrv = NULL; - return ret; - } + int ret = 0; /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ if (bs->sg || !bdrv_is_inserted(bs)) { - bdrv_delete(bs); drv = bdrv_find_format("raw"); if (!drv) { ret = -ENOENT; @@ -543,7 +537,6 @@ static int find_image_format(const char *filename, BlockDriver **pdrv) } ret = bdrv_pread(bs, 0, buf, sizeof(buf)); - bdrv_delete(bs); if (ret < 0) { *pdrv = NULL; return ret; @@ -634,10 +627,31 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) bs->copy_on_read--; } +static int bdrv_open_flags(BlockDriverState *bs, int flags) +{ + int open_flags = flags | BDRV_O_CACHE_WB; + + /* + * Clear flags that are internal to the block layer before opening the + * image. + */ + open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + + /* + * Snapshots should be writable. + */ + if (bs->is_temporary) { + open_flags |= BDRV_O_RDWR; + } + + return open_flags; +} + /* * Common part for opening disk images and files */ -static int bdrv_open_common(BlockDriverState *bs, const char *filename, +static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, + const char *filename, int flags, BlockDriver *drv) { int ret, open_flags; @@ -665,31 +679,22 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename, bs->opaque = g_malloc0(drv->instance_size); bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB); - open_flags = flags | BDRV_O_CACHE_WB; - - /* - * Clear flags that are internal to the block layer before opening the - * image. - */ - open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); - - /* - * Snapshots should be writable. - */ - if (bs->is_temporary) { - open_flags |= BDRV_O_RDWR; - } + open_flags = bdrv_open_flags(bs, flags); bs->read_only = !(open_flags & BDRV_O_RDWR); /* Open the image, either directly or using a protocol */ if (drv->bdrv_file_open) { - ret = drv->bdrv_file_open(bs, filename, open_flags); - } else { - ret = bdrv_file_open(&bs->file, filename, open_flags); - if (ret >= 0) { - ret = drv->bdrv_open(bs, open_flags); + if (file != NULL) { + bdrv_swap(file, bs); + ret = 0; + } else { + ret = drv->bdrv_file_open(bs, filename, open_flags); } + } else { + assert(file != NULL); + bs->file = file; + ret = drv->bdrv_open(bs, open_flags); } if (ret < 0) { @@ -709,10 +714,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename, return 0; free_and_fail: - if (bs->file) { - bdrv_delete(bs->file); - bs->file = NULL; - } + bs->file = NULL; g_free(bs->opaque); bs->opaque = NULL; bs->drv = NULL; @@ -734,7 +736,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) } bs = bdrv_new(""); - ret = bdrv_open_common(bs, filename, flags, drv); + ret = bdrv_open_common(bs, NULL, filename, flags, drv); if (ret < 0) { bdrv_delete(bs); return ret; @@ -789,6 +791,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, int ret; /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char tmp_filename[PATH_MAX + 1]; + BlockDriverState *file = NULL; if (flags & BDRV_O_SNAPSHOT) { BlockDriverState *bs1; @@ -848,25 +851,36 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, bs->is_temporary = 1; } + /* Open image file without format layer */ + if (flags & BDRV_O_RDWR) { + flags |= BDRV_O_ALLOW_RDWR; + } + + ret = bdrv_file_open(&file, filename, bdrv_open_flags(bs, flags)); + if (ret < 0) { + return ret; + } + /* Find the right image format driver */ if (!drv) { - ret = find_image_format(filename, &drv); + ret = find_image_format(file, filename, &drv); } if (!drv) { goto unlink_and_fail; } - if (flags & BDRV_O_RDWR) { - flags |= BDRV_O_ALLOW_RDWR; - } - /* Open the image */ - ret = bdrv_open_common(bs, filename, flags, drv); + ret = bdrv_open_common(bs, file, filename, flags, drv); if (ret < 0) { goto unlink_and_fail; } + if (bs->file != file) { + bdrv_delete(file); + file = NULL; + } + /* If there is a backing file, use it */ if ((flags & BDRV_O_NO_BACKING) == 0) { ret = bdrv_open_backing_file(bs); @@ -888,6 +902,9 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, return 0; unlink_and_fail: + if (file != NULL) { + bdrv_delete(file); + } if (bs->is_temporary) { unlink(filename); } @@ -3028,7 +3045,46 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event) } drv->bdrv_debug_event(bs, event); +} + +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { + bs = bs->file; + } + + if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { + return bs->drv->bdrv_debug_breakpoint(bs, event, tag); + } + return -ENOTSUP; +} + +int bdrv_debug_resume(BlockDriverState *bs, const char *tag) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_resume) { + bs = bs->file; + } + + if (bs && bs->drv && bs->drv->bdrv_debug_resume) { + return bs->drv->bdrv_debug_resume(bs, tag); + } + + return -ENOTSUP; +} + +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) +{ + while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { + bs = bs->file; + } + + if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { + return bs->drv->bdrv_debug_is_suspended(bs, tag); + } + + return false; } /**************************************************************/ @@ -3778,12 +3834,20 @@ typedef struct BlockDriverAIOCBCoroutine { BlockDriverAIOCB common; BlockRequest req; bool is_write; + bool *done; QEMUBH* bh; } BlockDriverAIOCBCoroutine; static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) { - qemu_aio_flush(); + BlockDriverAIOCBCoroutine *acb = + container_of(blockacb, BlockDriverAIOCBCoroutine, common); + bool done = false; + + acb->done = &done; + while (!done) { + qemu_aio_wait(); + } } static const AIOCBInfo bdrv_em_co_aiocb_info = { @@ -3796,6 +3860,11 @@ static void bdrv_co_em_bh(void *opaque) BlockDriverAIOCBCoroutine *acb = opaque; acb->common.cb(acb->common.opaque, acb->req.error); + + if (acb->done) { + *acb->done = true; + } + qemu_bh_delete(acb->bh); qemu_aio_release(acb); } @@ -3834,6 +3903,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, acb->req.nb_sectors = nb_sectors; acb->req.qiov = qiov; acb->is_write = is_write; + acb->done = NULL; co = qemu_coroutine_create(bdrv_co_do_rw); qemu_coroutine_enter(co, acb); @@ -3860,6 +3930,8 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs, BlockDriverAIOCBCoroutine *acb; acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); + acb->done = NULL; + co = qemu_coroutine_create(bdrv_aio_flush_co_entry); qemu_coroutine_enter(co, acb); @@ -3888,6 +3960,7 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs, acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); acb->req.sector = sector_num; acb->req.nb_sectors = nb_sectors; + acb->done = NULL; co = qemu_coroutine_create(bdrv_aio_discard_co_entry); qemu_coroutine_enter(co, acb); @@ -4408,9 +4481,9 @@ bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie) bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns; } -int bdrv_img_create(const char *filename, const char *fmt, - const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags) +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, Error **errp) { QEMUOptionParameter *param = NULL, *create_options = NULL; QEMUOptionParameter *backing_fmt, *backing_file, *size; @@ -4422,16 +4495,14 @@ int bdrv_img_create(const char *filename, const char *fmt, /* Find driver and parse its options */ drv = bdrv_find_format(fmt); if (!drv) { - error_report("Unknown file format '%s'", fmt); - ret = -EINVAL; - goto out; + error_setg(errp, "Unknown file format '%s'", fmt); + return; } proto_drv = bdrv_find_protocol(filename); if (!proto_drv) { - error_report("Unknown protocol '%s'", filename); - ret = -EINVAL; - goto out; + error_setg(errp, "Unknown protocol '%s'", filename); + return; } create_options = append_option_parameters(create_options, @@ -4448,8 +4519,7 @@ int bdrv_img_create(const char *filename, const char *fmt, if (options) { param = parse_option_parameters(options, create_options, param); if (param == NULL) { - error_report("Invalid options for file format '%s'.", fmt); - ret = -EINVAL; + error_setg(errp, "Invalid options for file format '%s'.", fmt); goto out; } } @@ -4457,18 +4527,16 @@ int bdrv_img_create(const char *filename, const char *fmt, if (base_filename) { if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE, base_filename)) { - error_report("Backing file not supported for file format '%s'", - fmt); - ret = -EINVAL; + error_setg(errp, "Backing file not supported for file format '%s'", + fmt); goto out; } } if (base_fmt) { if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { - error_report("Backing file format not supported for file " - "format '%s'", fmt); - ret = -EINVAL; + error_setg(errp, "Backing file format not supported for file " + "format '%s'", fmt); goto out; } } @@ -4476,9 +4544,8 @@ int bdrv_img_create(const char *filename, const char *fmt, backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); if (backing_file && backing_file->value.s) { if (!strcmp(filename, backing_file->value.s)) { - error_report("Error: Trying to create an image with the " - "same filename as the backing file"); - ret = -EINVAL; + error_setg(errp, "Error: Trying to create an image with the " + "same filename as the backing file"); goto out; } } @@ -4487,9 +4554,8 @@ int bdrv_img_create(const char *filename, const char *fmt, if (backing_fmt && backing_fmt->value.s) { backing_drv = bdrv_find_format(backing_fmt->value.s); if (!backing_drv) { - error_report("Unknown backing file format '%s'", - backing_fmt->value.s); - ret = -EINVAL; + error_setg(errp, "Unknown backing file format '%s'", + backing_fmt->value.s); goto out; } } @@ -4511,7 +4577,8 @@ int bdrv_img_create(const char *filename, const char *fmt, ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv); if (ret < 0) { - error_report("Could not open '%s'", backing_file->value.s); + error_setg_errno(errp, -ret, "Could not open '%s'", + backing_file->value.s); goto out; } bdrv_get_geometry(bs, &size); @@ -4520,8 +4587,7 @@ int bdrv_img_create(const char *filename, const char *fmt, snprintf(buf, sizeof(buf), "%" PRId64, size); set_option_parameter(param, BLOCK_OPT_SIZE, buf); } else { - error_report("Image creation needs a size parameter"); - ret = -EINVAL; + error_setg(errp, "Image creation needs a size parameter"); goto out; } } @@ -4531,17 +4597,16 @@ int bdrv_img_create(const char *filename, const char *fmt, puts(""); ret = bdrv_create(drv, filename, param); - if (ret < 0) { if (ret == -ENOTSUP) { - error_report("Formatting or formatting option not supported for " - "file format '%s'", fmt); + error_setg(errp,"Formatting or formatting option not supported for " + "file format '%s'", fmt); } else if (ret == -EFBIG) { - error_report("The image size is too large for file format '%s'", - fmt); + error_setg(errp, "The image size is too large for file format '%s'", + fmt); } else { - error_report("%s: error while creating %s: %s", filename, fmt, - strerror(-ret)); + error_setg(errp, "%s: error while creating %s: %s", filename, fmt, + strerror(-ret)); } } @@ -4552,6 +4617,4 @@ out: if (bs) { bdrv_delete(bs); } - - return ret; } @@ -343,9 +343,9 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size); -int bdrv_img_create(const char *filename, const char *fmt, - const char *base_filename, const char *base_fmt, - char *options, uint64_t img_size, int flags); +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, Error **errp); void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); @@ -431,4 +431,9 @@ typedef enum { #define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt) void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event); +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag); +int bdrv_debug_resume(BlockDriverState *bs, const char *tag); +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + #endif diff --git a/block/blkdebug.c b/block/blkdebug.c index d61ece86a9..294e983306 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -29,8 +29,10 @@ typedef struct BDRVBlkdebugState { int state; int new_state; + QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; + QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs; } BDRVBlkdebugState; typedef struct BlkdebugAIOCB { @@ -39,6 +41,12 @@ typedef struct BlkdebugAIOCB { int ret; } BlkdebugAIOCB; +typedef struct BlkdebugSuspendedReq { + Coroutine *co; + char *tag; + QLIST_ENTRY(BlkdebugSuspendedReq) next; +} BlkdebugSuspendedReq; + static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb); static const AIOCBInfo blkdebug_aiocb_info = { @@ -49,6 +57,7 @@ static const AIOCBInfo blkdebug_aiocb_info = { enum { ACTION_INJECT_ERROR, ACTION_SET_STATE, + ACTION_SUSPEND, }; typedef struct BlkdebugRule { @@ -65,6 +74,9 @@ typedef struct BlkdebugRule { struct { int new_state; } set_state; + struct { + char *tag; + } suspend; } options; QLIST_ENTRY(BlkdebugRule) next; QSIMPLEQ_ENTRY(BlkdebugRule) active_next; @@ -226,6 +238,11 @@ static int add_rule(QemuOpts *opts, void *opaque) rule->options.set_state.new_state = qemu_opt_get_number(opts, "new_state", 0); break; + + case ACTION_SUSPEND: + rule->options.suspend.tag = + g_strdup(qemu_opt_get(opts, "tag")); + break; }; /* Add the rule */ @@ -234,12 +251,32 @@ static int add_rule(QemuOpts *opts, void *opaque) return 0; } +static void remove_rule(BlkdebugRule *rule) +{ + switch (rule->action) { + case ACTION_INJECT_ERROR: + case ACTION_SET_STATE: + break; + case ACTION_SUSPEND: + g_free(rule->options.suspend.tag); + break; + } + + QLIST_REMOVE(rule, next); + g_free(rule); +} + static int read_config(BDRVBlkdebugState *s, const char *filename) { FILE *f; int ret; struct add_rule_data d; + /* Allow usage without config file */ + if (!*filename) { + return 0; + } + f = fopen(filename, "r"); if (f == NULL) { return -errno; @@ -389,6 +426,7 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs, return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque); } + static void blkdebug_close(BlockDriverState *bs) { BDRVBlkdebugState *s = bs->opaque; @@ -397,12 +435,32 @@ static void blkdebug_close(BlockDriverState *bs) for (i = 0; i < BLKDBG_EVENT_MAX; i++) { QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { - QLIST_REMOVE(rule, next); - g_free(rule); + remove_rule(rule); } } } +static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq r; + + r = (BlkdebugSuspendedReq) { + .co = qemu_coroutine_self(), + .tag = g_strdup(rule->options.suspend.tag), + }; + + remove_rule(rule); + QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next); + + printf("blkdebug: Suspended request '%s'\n", r.tag); + qemu_coroutine_yield(); + printf("blkdebug: Resuming request '%s'\n", r.tag); + + QLIST_REMOVE(&r, next); + g_free(r.tag); +} + static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, bool injected) { @@ -426,6 +484,10 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, case ACTION_SET_STATE: s->new_state = rule->options.set_state.new_state; break; + + case ACTION_SUSPEND: + suspend_request(bs, rule); + break; } return injected; } @@ -433,19 +495,72 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) { BDRVBlkdebugState *s = bs->opaque; - struct BlkdebugRule *rule; + struct BlkdebugRule *rule, *next; bool injected; assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); injected = false; s->new_state = s->state; - QLIST_FOREACH(rule, &s->rules[event], next) { + QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) { injected = process_rule(bs, rule, injected); } s->state = s->new_state; } +static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + struct BlkdebugRule *rule; + BlkDebugEvent blkdebug_event; + + if (get_event_by_name(event, &blkdebug_event) < 0) { + return -ENOENT; + } + + + rule = g_malloc(sizeof(*rule)); + *rule = (struct BlkdebugRule) { + .event = blkdebug_event, + .action = ACTION_SUSPEND, + .state = 0, + .options.suspend.tag = g_strdup(tag), + }; + + QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next); + + return 0; +} + +static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + qemu_coroutine_enter(r->co, NULL); + return 0; + } + } + return -ENOENT; +} + + +static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) +{ + BDRVBlkdebugState *s = bs->opaque; + BlkdebugSuspendedReq *r; + + QLIST_FOREACH(r, &s->suspended_reqs, next) { + if (!strcmp(r->tag, tag)) { + return true; + } + } + return false; +} + static int64_t blkdebug_getlength(BlockDriverState *bs) { return bdrv_getlength(bs->file); @@ -464,7 +579,10 @@ static BlockDriver bdrv_blkdebug = { .bdrv_aio_readv = blkdebug_aio_readv, .bdrv_aio_writev = blkdebug_aio_writev, - .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_event = blkdebug_debug_event, + .bdrv_debug_breakpoint = blkdebug_debug_breakpoint, + .bdrv_debug_resume = blkdebug_debug_resume, + .bdrv_debug_is_suspended = blkdebug_debug_is_suspended, }; static void bdrv_blkdebug_init(void) diff --git a/block/commit.c b/block/commit.c index fae79582d4..e2bb1e241b 100644 --- a/block/commit.c +++ b/block/commit.c @@ -103,7 +103,7 @@ static void coroutine_fn commit_run(void *opaque) wait: /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/block/mirror.c b/block/mirror.c index d6618a4b34..b1f5d4fa22 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -205,7 +205,7 @@ static void coroutine_fn mirror_run(void *opaque) } /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e179211c57..468ef1be56 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -615,57 +615,67 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) { BDRVQcowState *s = bs->opaque; - int i, j = 0, l2_index, ret; - uint64_t *old_cluster, start_sect, *l2_table; - uint64_t cluster_offset = m->alloc_offset; - bool cow = false; - - trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + int ret; - if (m->nb_clusters == 0) + if (r->nb_sectors == 0) { return 0; + } - old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + qemu_co_mutex_unlock(&s->lock); + ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, + r->offset / BDRV_SECTOR_SIZE, + r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); + qemu_co_mutex_lock(&s->lock); - /* copy content of unmodified sectors */ - start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; - if (m->n_start) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; - } - - if (m->nb_available & (s->cluster_sectors - 1)) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available, - align_offset(m->nb_available, s->cluster_sectors)); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; + if (ret < 0) { + return ret; } /* - * Update L2 table. - * * Before we update the L2 table to actually point to the new cluster, we * need to be sure that the refcounts have been increased and COW was * handled. */ - if (cow) { - qcow2_cache_depends_on_flush(s->l2_table_cache); + qcow2_cache_depends_on_flush(s->l2_table_cache); + + return 0; +} + +int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int i, j = 0, l2_index, ret; + uint64_t *old_cluster, *l2_table; + uint64_t cluster_offset = m->alloc_offset; + + trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + assert(m->nb_clusters > 0); + + old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + + /* copy content of unmodified sectors */ + ret = perform_cow(bs, m, &m->cow_start); + if (ret < 0) { + goto err; + } + + ret = perform_cow(bs, m, &m->cow_end); + if (ret < 0) { + goto err; } + /* Update L2 table. */ + if (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS) { + qcow2_mark_dirty(bs); + } if (qcow2_need_accurate_refcounts(s)) { qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); } + ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); if (ret < 0) { goto err; @@ -743,38 +753,16 @@ out: } /* - * Allocates new clusters for the given guest_offset. - * - * At most *nb_clusters are allocated, and on return *nb_clusters is updated to - * contain the number of clusters that have been allocated and are contiguous - * in the image file. - * - * If *host_offset is non-zero, it specifies the offset in the image file at - * which the new clusters must start. *nb_clusters can be 0 on return in this - * case if the cluster at host_offset is already in use. If *host_offset is - * zero, the clusters can be allocated anywhere in the image file. - * - * *host_offset is updated to contain the offset into the image file at which - * the first allocated cluster starts. - * - * Return 0 on success and -errno in error cases. -EAGAIN means that the - * function has been waiting for another request and the allocation must be - * restarted, but the whole request should not be failed. + * Check if there already is an AIO write request in flight which allocates + * the same cluster. In this case we need to wait until the previous + * request has completed and updated the L2 table accordingly. */ -static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, - uint64_t *host_offset, unsigned int *nb_clusters) +static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, + unsigned int *nb_clusters) { BDRVQcowState *s = bs->opaque; QCowL2Meta *old_alloc; - trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, - *host_offset, *nb_clusters); - - /* - * Check if there already is an AIO write request in flight which allocates - * the same cluster. In this case we need to wait until the previous - * request has completed and updated the L2 table accordingly. - */ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { uint64_t start = guest_offset >> s->cluster_bits; @@ -807,6 +795,42 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, abort(); } + return 0; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, unsigned int *nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + int ret; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); + + ret = handle_dependencies(bs, guest_offset, nb_clusters); + if (ret < 0) { + return ret; + } + /* Allocate new clusters */ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); if (*host_offset == 0) { @@ -818,7 +842,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, *host_offset = cluster_offset; return 0; } else { - int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); + ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters); if (ret < 0) { return ret; } @@ -847,7 +871,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m) + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m) { BDRVQcowState *s = bs->opaque; int l2_index, ret, sectors; @@ -919,12 +943,6 @@ again: } /* If there is something left to allocate, do that now */ - *m = (QCowL2Meta) { - .cluster_offset = cluster_offset, - .nb_clusters = 0, - }; - qemu_co_queue_init(&m->dependent_requests); - if (nb_clusters > 0) { uint64_t alloc_offset; uint64_t alloc_cluster_offset; @@ -957,22 +975,40 @@ again: * * avail_sectors: Number of sectors from the start of the first * newly allocated to the end of the last newly allocated cluster. + * + * nb_sectors: The number of sectors from the start of the first + * newly allocated cluster to the end of the aread that the write + * request actually writes to (excluding COW at the end) */ int requested_sectors = n_end - keep_clusters * s->cluster_sectors; int avail_sectors = nb_clusters << (s->cluster_bits - BDRV_SECTOR_BITS); + int alloc_n_start = keep_clusters == 0 ? n_start : 0; + int nb_sectors = MIN(requested_sectors, avail_sectors); + + if (keep_clusters == 0) { + cluster_offset = alloc_cluster_offset; + } + + *m = g_malloc0(sizeof(**m)); - *m = (QCowL2Meta) { - .cluster_offset = keep_clusters == 0 ? - alloc_cluster_offset : cluster_offset, + **m = (QCowL2Meta) { .alloc_offset = alloc_cluster_offset, - .offset = alloc_offset, - .n_start = keep_clusters == 0 ? n_start : 0, + .offset = alloc_offset & ~(s->cluster_size - 1), .nb_clusters = nb_clusters, - .nb_available = MIN(requested_sectors, avail_sectors), + .nb_available = nb_sectors, + + .cow_start = { + .offset = 0, + .nb_sectors = alloc_n_start, + }, + .cow_end = { + .offset = nb_sectors * BDRV_SECTOR_SIZE, + .nb_sectors = avail_sectors - nb_sectors, + }, }; - qemu_co_queue_init(&m->dependent_requests); - QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); + qemu_co_queue_init(&(*m)->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); } } @@ -984,12 +1020,13 @@ again: assert(sectors > n_start); *num = sectors - n_start; + *host_offset = cluster_offset; return 0; fail: - if (m->nb_clusters > 0) { - QLIST_REMOVE(m, next_in_flight); + if (*m && (*m)->nb_clusters > 0) { + QLIST_REMOVE(*m, next_in_flight); } return ret; } diff --git a/block/qcow2.c b/block/qcow2.c index c1ff31f482..8520bda21a 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -222,7 +222,7 @@ static void report_unsupported_feature(BlockDriverState *bs, * updated successfully. Therefore it is not required to check the return * value of this function. */ -static int qcow2_mark_dirty(BlockDriverState *bs) +int qcow2_mark_dirty(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; uint64_t val; @@ -745,21 +745,6 @@ fail: return ret; } -static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m) -{ - /* Take the request off the list of running requests */ - if (m->nb_clusters != 0) { - QLIST_REMOVE(m, next_in_flight); - } - - /* Restart all dependent requests */ - if (!qemu_co_queue_empty(&m->dependent_requests)) { - qemu_co_mutex_unlock(&s->lock); - qemu_co_queue_restart_all(&m->dependent_requests); - qemu_co_mutex_lock(&s->lock); - } -} - static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, @@ -774,15 +759,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QEMUIOVector hd_qiov; uint64_t bytes_done = 0; uint8_t *cluster_data = NULL; - QCowL2Meta l2meta = { - .nb_clusters = 0, - }; + QCowL2Meta *l2meta; trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, remaining_sectors); - qemu_co_queue_init(&l2meta.dependent_requests); - qemu_iovec_init(&hd_qiov, qiov->niov); s->cluster_cache_offset = -1; /* disable compressed cache */ @@ -791,6 +772,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, while (remaining_sectors != 0) { + l2meta = NULL; + trace_qcow2_writev_start_part(qemu_coroutine_self()); index_in_cluster = sector_num & (s->cluster_sectors - 1); n_end = index_in_cluster + remaining_sectors; @@ -800,17 +783,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, } ret = qcow2_alloc_cluster_offset(bs, sector_num << 9, - index_in_cluster, n_end, &cur_nr_sectors, &l2meta); + index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta); if (ret < 0) { goto fail; } - if (l2meta.nb_clusters > 0 && - (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) { - qcow2_mark_dirty(bs); - } - - cluster_offset = l2meta.cluster_offset; assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); @@ -835,8 +812,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, cur_nr_sectors * 512); } - BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); qemu_co_mutex_unlock(&s->lock); + BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), (cluster_offset >> 9) + index_in_cluster); ret = bdrv_co_writev(bs->file, @@ -847,12 +824,24 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, goto fail; } - ret = qcow2_alloc_cluster_link_l2(bs, &l2meta); - if (ret < 0) { - goto fail; - } + if (l2meta != NULL) { + ret = qcow2_alloc_cluster_link_l2(bs, l2meta); + if (ret < 0) { + goto fail; + } + + /* Take the request off the list of running requests */ + if (l2meta->nb_clusters != 0) { + QLIST_REMOVE(l2meta, next_in_flight); + } + + qemu_co_mutex_unlock(&s->lock); + qemu_co_queue_restart_all(&l2meta->dependent_requests); + qemu_co_mutex_lock(&s->lock); - run_dependent_requests(s, &l2meta); + g_free(l2meta); + l2meta = NULL; + } remaining_sectors -= cur_nr_sectors; sector_num += cur_nr_sectors; @@ -862,10 +851,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, ret = 0; fail: - run_dependent_requests(s, &l2meta); - qemu_co_mutex_unlock(&s->lock); + if (l2meta != NULL) { + if (l2meta->nb_clusters != 0) { + QLIST_REMOVE(l2meta, next_in_flight); + } + qemu_co_queue_restart_all(&l2meta->dependent_requests); + g_free(l2meta); + } + qemu_iovec_destroy(&hd_qiov); qemu_vfree(cluster_data); trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); @@ -1128,31 +1123,33 @@ static int preallocate(BlockDriverState *bs) { uint64_t nb_sectors; uint64_t offset; + uint64_t host_offset = 0; int num; int ret; - QCowL2Meta meta; + QCowL2Meta *meta; nb_sectors = bdrv_getlength(bs) >> 9; offset = 0; - qemu_co_queue_init(&meta.dependent_requests); - meta.cluster_offset = 0; while (nb_sectors) { num = MIN(nb_sectors, INT_MAX >> 9); - ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta); + ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, + &host_offset, &meta); if (ret < 0) { return ret; } - ret = qcow2_alloc_cluster_link_l2(bs, &meta); + ret = qcow2_alloc_cluster_link_l2(bs, meta); if (ret < 0) { - qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters); + qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters); return ret; } /* There are no dependent requests, but we need to remove our request * from the list of in-flight requests */ - run_dependent_requests(bs->opaque, &meta); + if (meta != NULL) { + QLIST_REMOVE(meta, next_in_flight); + } /* TODO Preallocate data if requested */ @@ -1165,10 +1162,10 @@ static int preallocate(BlockDriverState *bs) * all of the allocated clusters (otherwise we get failing reads after * EOF). Extend the image to the last allocated sector. */ - if (meta.cluster_offset != 0) { + if (host_offset != 0) { uint8_t buf[512]; memset(buf, 0, 512); - ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1); + ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1); if (ret < 0) { return ret; } diff --git a/block/qcow2.h b/block/qcow2.h index b4eb65470e..a60fcb429a 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -196,17 +196,56 @@ typedef struct QCowCreateState { struct QCowAIOCB; -/* XXX This could be private for qcow2-cluster.c */ +typedef struct Qcow2COWRegion { + /** + * Offset of the COW region in bytes from the start of the first cluster + * touched by the request. + */ + uint64_t offset; + + /** Number of sectors to copy */ + int nb_sectors; +} Qcow2COWRegion; + +/** + * Describes an in-flight (part of a) write request that writes to clusters + * that are not referenced in their L2 table yet. + */ typedef struct QCowL2Meta { + /** Guest offset of the first newly allocated cluster */ uint64_t offset; - uint64_t cluster_offset; + + /** Host offset of the first newly allocated cluster */ uint64_t alloc_offset; - int n_start; + + /** + * Number of sectors from the start of the first allocated cluster to + * the end of the (possibly shortened) request + */ int nb_available; + + /** Number of newly allocated clusters */ int nb_clusters; + + /** + * Requests that overlap with this allocation and wait to be restarted + * when the allocating request has completed. + */ CoQueue dependent_requests; + /** + * The COW Region between the start of the first allocated cluster and the + * area the guest actually writes to. + */ + Qcow2COWRegion cow_start; + + /** + * The COW Region between the area the guest actually writes to and the + * end of the last allocated cluster. + */ + Qcow2COWRegion cow_end; + QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; @@ -264,6 +303,8 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) /* qcow2.c functions */ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int64_t sector_num, int nb_sectors); + +int qcow2_mark_dirty(BlockDriverState *bs); int qcow2_update_header(BlockDriverState *bs); /* qcow2-refcount.c functions */ @@ -297,7 +338,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset); int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m); + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m); uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size); diff --git a/block/raw-posix.c b/block/raw-posix.c index 550c81f22b..abfedbea73 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -708,22 +708,6 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } -static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, - unsigned long int req, void *buf, - BlockDriverCompletionFunc *cb, void *opaque) -{ - RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); - - acb->bs = bs; - acb->aio_type = QEMU_AIO_IOCTL; - acb->aio_fildes = fd; - acb->aio_offset = 0; - acb->aio_ioctl_buf = buf; - acb->aio_ioctl_cmd = req; - - return thread_pool_submit_aio(aio_worker, acb, cb, opaque); -} - static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int type) @@ -1346,10 +1330,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { BDRVRawState *s = bs->opaque; + RawPosixAIOData *acb; if (fd_open(bs) < 0) return NULL; - return paio_ioctl(bs, s->fd, req, buf, cb, opaque); + + acb = g_slice_new(RawPosixAIOData); + acb->bs = bs; + acb->aio_type = QEMU_AIO_IOCTL; + acb->aio_fildes = s->fd; + acb->aio_offset = 0; + acb->aio_ioctl_buf = buf; + acb->aio_ioctl_cmd = req; + return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) diff --git a/block/raw-win32.c b/block/raw-win32.c index 0c05c58c5a..ce207a3109 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -303,13 +303,24 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset) { BDRVRawState *s = bs->opaque; LONG low, high; + DWORD dwPtrLow; low = offset; high = offset >> 32; - if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN)) - return -EIO; - if (!SetEndOfFile(s->hfile)) + + /* + * An error has occurred if the return value is INVALID_SET_FILE_POINTER + * and GetLastError doesn't return NO_ERROR. + */ + dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN); + if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) { + fprintf(stderr, "SetFilePointer error: %d\n", GetLastError()); + return -EIO; + } + if (SetEndOfFile(s->hfile) == 0) { + fprintf(stderr, "SetEndOfFile error: %d\n", GetLastError()); return -EIO; + } return 0; } diff --git a/block/rbd.c b/block/rbd.c index f3becc7a8b..737bab16cc 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -77,6 +77,7 @@ typedef struct RBDAIOCB { int error; struct BDRVRBDState *s; int cancelled; + int status; } RBDAIOCB; typedef struct RADOSCB { @@ -376,12 +377,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) RBDAIOCB *acb = rcb->acb; int64_t r; - if (acb->cancelled) { - qemu_vfree(acb->bounce); - qemu_aio_release(acb); - goto done; - } - r = rcb->ret; if (acb->cmd == RBD_AIO_WRITE || @@ -409,7 +404,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) /* Note that acb->bh can be NULL in case where the aio was cancelled */ acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); qemu_bh_schedule(acb->bh); -done: g_free(rcb); } @@ -568,6 +562,12 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb) { RBDAIOCB *acb = (RBDAIOCB *) blockacb; acb->cancelled = 1; + + while (acb->status == -EINPROGRESS) { + qemu_aio_wait(); + } + + qemu_aio_release(acb); } static const AIOCBInfo rbd_aiocb_info = { @@ -639,8 +639,11 @@ static void rbd_aio_bh_cb(void *opaque) acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); qemu_bh_delete(acb->bh); acb->bh = NULL; + acb->status = 0; - qemu_aio_release(acb); + if (!acb->cancelled) { + qemu_aio_release(acb); + } } static int rbd_aio_discard_wrapper(rbd_image_t image, @@ -685,6 +688,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, acb->s = s; acb->cancelled = 0; acb->bh = NULL; + acb->status = -EINPROGRESS; if (cmd == RBD_AIO_WRITE) { qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); diff --git a/block/stream.c b/block/stream.c index 0c0fc7a13b..0dcd286035 100644 --- a/block/stream.c +++ b/block/stream.c @@ -108,7 +108,7 @@ static void coroutine_fn stream_run(void *opaque) wait: /* Note that even when no rate limit is applied we need to yield - * with no pending I/O here so that qemu_aio_flush() returns. + * with no pending I/O here so that bdrv_drain_all() returns. */ block_job_sleep_ns(&s->common, rt_clock, delay_ns); if (block_job_is_cancelled(&s->common)) { diff --git a/block/vpc.c b/block/vpc.c index b6bf52f140..566e9a3b37 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -26,6 +26,9 @@ #include "block_int.h" #include "module.h" #include "migration.h" +#if defined(CONFIG_UUID) +#include <uuid/uuid.h> +#endif /**************************************************************/ @@ -198,7 +201,8 @@ static int vpc_open(BlockDriverState *bs, int flags) bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - if (bs->total_sectors >= 65535 * 16 * 255) { + /* Allow a maximum disk size of approximately 2 TB */ + if (bs->total_sectors >= 65535LL * 255 * 255) { err = -EFBIG; goto fail; } @@ -524,19 +528,27 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, * Note that the geometry doesn't always exactly match total_sectors but * may round it down. * - * Returns 0 on success, -EFBIG if the size is larger than 127 GB + * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override + * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB) + * and instead allow up to 255 heads. */ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, uint8_t* heads, uint8_t* secs_per_cyl) { uint32_t cyls_times_heads; - if (total_sectors > 65535 * 16 * 255) + /* Allow a maximum disk size of approximately 2 TB */ + if (total_sectors > 65535LL * 255 * 255) { return -EFBIG; + } if (total_sectors > 65535 * 16 * 63) { *secs_per_cyl = 255; - *heads = 16; + if (total_sectors > 65535 * 16 * 255) { + *heads = 255; + } else { + *heads = 16; + } cyls_times_heads = total_sectors / *secs_per_cyl; } else { *secs_per_cyl = 17; @@ -739,7 +751,9 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options) footer->type = be32_to_cpu(disk_type); - /* TODO uuid is missing */ +#if defined(CONFIG_UUID) + uuid_generate(footer->uuid); +#endif footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE)); diff --git a/block_int.h b/block_int.h index 9deedb811a..bf3f79b3db 100644 --- a/block_int.h +++ b/block_int.h @@ -190,6 +190,12 @@ struct BlockDriver { void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event); + /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ + int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, + const char *tag); + int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); + bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + /* * Returns 1 if newly created images are guaranteed to contain only * zeros, 0 otherwise. diff --git a/blockdev.c b/blockdev.c index e73fd6e388..9a05e57009 100644 --- a/blockdev.c +++ b/blockdev.c @@ -275,7 +275,7 @@ static bool do_check_io_limits(BlockIOLimit *io_limits) return true; } -DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) +DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type) { const char *buf; const char *file = NULL; @@ -325,7 +325,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) return NULL; } } else { - type = default_to_scsi ? IF_SCSI : IF_IDE; + type = block_default_type; } max_devs = if_max_devs[type]; @@ -568,7 +568,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) break; case IF_VIRTIO: /* add virtio block device */ - opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL); + opts = qemu_opts_create_nofail(qemu_find_opts("device")); if (arch_type == QEMU_ARCH_S390X) { qemu_opt_set(opts, "driver", "virtio-blk-s390"); } else { @@ -707,6 +707,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) int ret = 0; BlockdevActionList *dev_entry = dev_list; BlkTransactionStates *states, *next; + Error *local_err = NULL; QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states; QSIMPLEQ_INIT(&snap_bdrv_states); @@ -786,12 +787,12 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) /* create new image w/backing file */ if (mode != NEW_IMAGE_MODE_EXISTING) { - ret = bdrv_img_create(new_image_file, format, - states->old_bs->filename, - states->old_bs->drv->format_name, - NULL, -1, flags); - if (ret) { - error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); + bdrv_img_create(new_image_file, format, + states->old_bs->filename, + states->old_bs->drv->format_name, + NULL, -1, flags, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); goto delete_and_fail; } } @@ -1263,8 +1264,8 @@ void qmp_drive_mirror(const char *device, const char *target, assert(format && drv); bdrv_get_geometry(bs, &size); size *= 512; - ret = bdrv_img_create(target, format, - NULL, NULL, NULL, size, flags); + bdrv_img_create(target, format, + NULL, NULL, NULL, size, flags, &local_err); } else { switch (mode) { case NEW_IMAGE_MODE_EXISTING: @@ -1272,18 +1273,18 @@ void qmp_drive_mirror(const char *device, const char *target, break; case NEW_IMAGE_MODE_ABSOLUTE_PATHS: /* create new image with backing file */ - ret = bdrv_img_create(target, format, - source->filename, - source->drv->format_name, - NULL, -1, flags); + bdrv_img_create(target, format, + source->filename, + source->drv->format_name, + NULL, -1, flags, &local_err); break; default: abort(); } } - if (ret) { - error_set(errp, QERR_OPEN_FILE_FAILED, target); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); return; } diff --git a/blockdev.h b/blockdev.h index 5f27b643be..d73d552a98 100644 --- a/blockdev.h +++ b/blockdev.h @@ -19,8 +19,13 @@ void blockdev_auto_del(BlockDriverState *bs); typedef enum { IF_DEFAULT = -1, /* for use with drive_add() only */ + /* + * IF_IDE must be zero, because we want QEMUMachine member + * block_default_type to default-initialize to IF_IDE + */ + IF_IDE = 0, IF_NONE, - IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN, + IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN, IF_COUNT } BlockInterfaceType; @@ -51,7 +56,7 @@ DriveInfo *drive_get_by_blockdev(BlockDriverState *bs); QemuOpts *drive_def(const char *optstr); QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file, const char *optstr); -DriveInfo *drive_init(QemuOpts *arg, int default_to_scsi); +DriveInfo *drive_init(QemuOpts *arg, BlockInterfaceType block_default_type); /* device-hotplug */ @@ -2127,7 +2127,7 @@ fi # pixman support probe if test "$pixman" = ""; then - if $pkg_config --atleast-version=0.18.4 pixman-1 > /dev/null 2>&1; then + if $pkg_config pixman-1 > /dev/null 2>&1; then pixman="system" else pixman="internal" @@ -2138,7 +2138,7 @@ if test "$pixman" = "system"; then pixman_libs=`$pkg_config --libs pixman-1 2>/dev/null` else if test ! -d ${source_path}/pixman/pixman; then - echo "ERROR: pixman not present (or older than 0.18.4). Your options:" + echo "ERROR: pixman not present. Your options:" echo " (1) Preferred: Install the pixman devel package (any recent" echo " distro should have packages as Xorg needs pixman too)." echo " (2) Fetch the pixman submodule, using:" @@ -229,6 +229,16 @@ static inline void unregister_displaychangelistener(DisplayState *ds, static inline void dpy_gfx_update(DisplayState *s, int x, int y, int w, int h) { struct DisplayChangeListener *dcl; + int width = pixman_image_get_width(s->surface->image); + int height = pixman_image_get_height(s->surface->image); + + x = MAX(x, 0); + y = MAX(y, 0); + x = MIN(x, width); + y = MIN(y, height); + w = MIN(w, width - x); + h = MIN(h, height - y); + QLIST_FOREACH(dcl, &s->listeners, next) { if (dcl->dpy_gfx_update) { dcl->dpy_gfx_update(s, x, y, w, h); diff --git a/exec-all.h b/exec-all.h index b18d4ca534..e9b07cd986 100644 --- a/exec-all.h +++ b/exec-all.h @@ -80,8 +80,8 @@ void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb, void cpu_gen_init(void); int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb, int *gen_code_size_ptr); -int cpu_restore_state(struct TranslationBlock *tb, - CPUArchState *env, uintptr_t searched_pc); +bool cpu_restore_state(CPUArchState *env, uintptr_t searched_pc); + void QEMU_NORETURN cpu_resume_from_signal(CPUArchState *env1, void *puc); void QEMU_NORETURN cpu_io_recompile(CPUArchState *env, uintptr_t retaddr); TranslationBlock *tb_gen_code(CPUArchState *env, @@ -275,8 +275,6 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, } } -TranslationBlock *tb_find_pc(uintptr_t pc_ptr); - #include "qemu-lock.h" extern spinlock_t tb_lock; @@ -1,5 +1,5 @@ /* - * virtual page mapping and translated block handling + * Virtual page mapping * * Copyright (c) 2003 Fabrice Bellard * @@ -38,62 +38,20 @@ #include "exec-memory.h" #if defined(CONFIG_USER_ONLY) #include <qemu.h> -#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) -#include <sys/param.h> -#if __FreeBSD_version >= 700104 -#define HAVE_KINFO_GETVMMAP -#define sigqueue sigqueue_freebsd /* avoid redefinition */ -#include <sys/time.h> -#include <sys/proc.h> -#include <machine/profile.h> -#define _KERNEL -#include <sys/user.h> -#undef _KERNEL -#undef sigqueue -#include <libutil.h> -#endif -#endif #else /* !CONFIG_USER_ONLY */ #include "xen-mapcache.h" #include "trace.h" #endif #include "cputlb.h" +#include "translate-all.h" #include "memory-internal.h" -//#define DEBUG_TB_INVALIDATE -//#define DEBUG_FLUSH //#define DEBUG_UNASSIGNED - -/* make various TB consistency checks */ -//#define DEBUG_TB_CHECK - -//#define DEBUG_IOPORT //#define DEBUG_SUBPAGE #if !defined(CONFIG_USER_ONLY) -/* TB consistency checks only implemented for usermode emulation. */ -#undef DEBUG_TB_CHECK -#endif - -#define SMC_BITMAP_USE_THRESHOLD 10 - -static TranslationBlock *tbs; -static int code_gen_max_blocks; -TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; -static int nb_tbs; -/* any access to the tbs or the page table must use this lock */ -spinlock_t tb_lock = SPIN_LOCK_UNLOCKED; - -uint8_t *code_gen_prologue; -static uint8_t *code_gen_buffer; -static size_t code_gen_buffer_size; -/* threshold to flush the translated code buffer */ -static size_t code_gen_buffer_max_size; -static uint8_t *code_gen_ptr; - -#if !defined(CONFIG_USER_ONLY) int phys_ram_fd; static int in_migration; @@ -120,59 +78,6 @@ DEFINE_TLS(CPUArchState *,cpu_single_env); 2 = Adaptive rate instruction counting. */ int use_icount = 0; -typedef struct PageDesc { - /* list of TBs intersecting this ram page */ - TranslationBlock *first_tb; - /* in order to optimize self modifying code, we count the number - of lookups we do to a given page to use a bitmap */ - unsigned int code_write_count; - uint8_t *code_bitmap; -#if defined(CONFIG_USER_ONLY) - unsigned long flags; -#endif -} PageDesc; - -/* In system mode we want L1_MAP to be based on ram offsets, - while in user mode we want it to be based on virtual addresses. */ -#if !defined(CONFIG_USER_ONLY) -#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS -# define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS -#else -# define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS -#endif -#else -# define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS -#endif - -/* Size of the L2 (and L3, etc) page tables. */ -#define L2_BITS 10 -#define L2_SIZE (1 << L2_BITS) - -#define P_L2_LEVELS \ - (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1) - -/* The bits remaining after N lower levels of page tables. */ -#define V_L1_BITS_REM \ - ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS) - -#if V_L1_BITS_REM < 4 -#define V_L1_BITS (V_L1_BITS_REM + L2_BITS) -#else -#define V_L1_BITS V_L1_BITS_REM -#endif - -#define V_L1_SIZE ((target_ulong)1 << V_L1_BITS) - -#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS) - -uintptr_t qemu_real_host_page_size; -uintptr_t qemu_host_page_size; -uintptr_t qemu_host_page_mask; - -/* This is a multi-level map on the virtual address space. - The bottom level has pointers to PageDesc. */ -static void *l1_map[V_L1_SIZE]; - #if !defined(CONFIG_USER_ONLY) static MemoryRegionSection *phys_sections; @@ -194,179 +99,6 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr); static MemoryRegion io_mem_watch; #endif -static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, - tb_page_addr_t phys_page2); - -/* statistics */ -static int tb_flush_count; -static int tb_phys_invalidate_count; - -#ifdef _WIN32 -static inline void map_exec(void *addr, long size) -{ - DWORD old_protect; - VirtualProtect(addr, size, - PAGE_EXECUTE_READWRITE, &old_protect); - -} -#else -static inline void map_exec(void *addr, long size) -{ - unsigned long start, end, page_size; - - page_size = getpagesize(); - start = (unsigned long)addr; - start &= ~(page_size - 1); - - end = (unsigned long)addr + size; - end += page_size - 1; - end &= ~(page_size - 1); - - mprotect((void *)start, end - start, - PROT_READ | PROT_WRITE | PROT_EXEC); -} -#endif - -static void page_init(void) -{ - /* NOTE: we can always suppose that qemu_host_page_size >= - TARGET_PAGE_SIZE */ -#ifdef _WIN32 - { - SYSTEM_INFO system_info; - - GetSystemInfo(&system_info); - qemu_real_host_page_size = system_info.dwPageSize; - } -#else - qemu_real_host_page_size = getpagesize(); -#endif - if (qemu_host_page_size == 0) - qemu_host_page_size = qemu_real_host_page_size; - if (qemu_host_page_size < TARGET_PAGE_SIZE) - qemu_host_page_size = TARGET_PAGE_SIZE; - qemu_host_page_mask = ~(qemu_host_page_size - 1); - -#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) - { -#ifdef HAVE_KINFO_GETVMMAP - struct kinfo_vmentry *freep; - int i, cnt; - - freep = kinfo_getvmmap(getpid(), &cnt); - if (freep) { - mmap_lock(); - for (i = 0; i < cnt; i++) { - unsigned long startaddr, endaddr; - - startaddr = freep[i].kve_start; - endaddr = freep[i].kve_end; - if (h2g_valid(startaddr)) { - startaddr = h2g(startaddr) & TARGET_PAGE_MASK; - - if (h2g_valid(endaddr)) { - endaddr = h2g(endaddr); - page_set_flags(startaddr, endaddr, PAGE_RESERVED); - } else { -#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS - endaddr = ~0ul; - page_set_flags(startaddr, endaddr, PAGE_RESERVED); -#endif - } - } - } - free(freep); - mmap_unlock(); - } -#else - FILE *f; - - last_brk = (unsigned long)sbrk(0); - - f = fopen("/compat/linux/proc/self/maps", "r"); - if (f) { - mmap_lock(); - - do { - unsigned long startaddr, endaddr; - int n; - - n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr); - - if (n == 2 && h2g_valid(startaddr)) { - startaddr = h2g(startaddr) & TARGET_PAGE_MASK; - - if (h2g_valid(endaddr)) { - endaddr = h2g(endaddr); - } else { - endaddr = ~0ul; - } - page_set_flags(startaddr, endaddr, PAGE_RESERVED); - } - } while (!feof(f)); - - fclose(f); - mmap_unlock(); - } -#endif - } -#endif -} - -static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) -{ - PageDesc *pd; - void **lp; - int i; - -#if defined(CONFIG_USER_ONLY) - /* We can't use g_malloc because it may recurse into a locked mutex. */ -# define ALLOC(P, SIZE) \ - do { \ - P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \ - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \ - } while (0) -#else -# define ALLOC(P, SIZE) \ - do { P = g_malloc0(SIZE); } while (0) -#endif - - /* Level 1. Always allocated. */ - lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1)); - - /* Level 2..N-1. */ - for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) { - void **p = *lp; - - if (p == NULL) { - if (!alloc) { - return NULL; - } - ALLOC(p, sizeof(void *) * L2_SIZE); - *lp = p; - } - - lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1)); - } - - pd = *lp; - if (pd == NULL) { - if (!alloc) { - return NULL; - } - ALLOC(pd, sizeof(PageDesc) * L2_SIZE); - *lp = pd; - } - -#undef ALLOC - - return pd + (index & (L2_SIZE - 1)); -} - -static inline PageDesc *page_find(tb_page_addr_t index) -{ - return page_find_alloc(index, 0); -} #if !defined(CONFIG_USER_ONLY) @@ -474,177 +206,8 @@ bool memory_region_is_unassigned(MemoryRegion *mr) && mr != &io_mem_notdirty && !mr->rom_device && mr != &io_mem_watch; } - -#define mmap_lock() do { } while(0) -#define mmap_unlock() do { } while(0) -#endif - -#if defined(CONFIG_USER_ONLY) -/* Currently it is not recommended to allocate big chunks of data in - user mode. It will change when a dedicated libc will be used. */ -/* ??? 64-bit hosts ought to have no problem mmaping data outside the - region in which the guest needs to run. Revisit this. */ -#define USE_STATIC_CODE_GEN_BUFFER #endif -/* ??? Should configure for this, not list operating systems here. */ -#if (defined(__linux__) \ - || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ - || defined(__DragonFly__) || defined(__OpenBSD__) \ - || defined(__NetBSD__)) -# define USE_MMAP -#endif - -/* Minimum size of the code gen buffer. This number is randomly chosen, - but not so small that we can't have a fair number of TB's live. */ -#define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024) - -/* Maximum size of the code gen buffer we'd like to use. Unless otherwise - indicated, this is constrained by the range of direct branches on the - host cpu, as used by the TCG implementation of goto_tb. */ -#if defined(__x86_64__) -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) -#elif defined(__sparc__) -# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) -#elif defined(__arm__) -# define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024) -#elif defined(__s390x__) - /* We have a +- 4GB range on the branches; leave some slop. */ -# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) -#else -# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) -#endif - -#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024) - -#define DEFAULT_CODE_GEN_BUFFER_SIZE \ - (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ - ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE) - -static inline size_t size_code_gen_buffer(size_t tb_size) -{ - /* Size the buffer. */ - if (tb_size == 0) { -#ifdef USE_STATIC_CODE_GEN_BUFFER - tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; -#else - /* ??? Needs adjustments. */ - /* ??? If we relax the requirement that CONFIG_USER_ONLY use the - static buffer, we could size this on RESERVED_VA, on the text - segment size of the executable, or continue to use the default. */ - tb_size = (unsigned long)(ram_size / 4); -#endif - } - if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { - tb_size = MIN_CODE_GEN_BUFFER_SIZE; - } - if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) { - tb_size = MAX_CODE_GEN_BUFFER_SIZE; - } - code_gen_buffer_size = tb_size; - return tb_size; -} - -#ifdef USE_STATIC_CODE_GEN_BUFFER -static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] - __attribute__((aligned(CODE_GEN_ALIGN))); - -static inline void *alloc_code_gen_buffer(void) -{ - map_exec(static_code_gen_buffer, code_gen_buffer_size); - return static_code_gen_buffer; -} -#elif defined(USE_MMAP) -static inline void *alloc_code_gen_buffer(void) -{ - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - uintptr_t start = 0; - void *buf; - - /* Constrain the position of the buffer based on the host cpu. - Note that these addresses are chosen in concert with the - addresses assigned in the relevant linker script file. */ -# if defined(__PIE__) || defined(__PIC__) - /* Don't bother setting a preferred location if we're building - a position-independent executable. We're more likely to get - an address near the main executable if we let the kernel - choose the address. */ -# elif defined(__x86_64__) && defined(MAP_32BIT) - /* Force the memory down into low memory with the executable. - Leave the choice of exact location with the kernel. */ - flags |= MAP_32BIT; - /* Cannot expect to map more than 800MB in low memory. */ - if (code_gen_buffer_size > 800u * 1024 * 1024) { - code_gen_buffer_size = 800u * 1024 * 1024; - } -# elif defined(__sparc__) - start = 0x40000000ul; -# elif defined(__s390x__) - start = 0x90000000ul; -# endif - - buf = mmap((void *)start, code_gen_buffer_size, - PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0); - return buf == MAP_FAILED ? NULL : buf; -} -#else -static inline void *alloc_code_gen_buffer(void) -{ - void *buf = g_malloc(code_gen_buffer_size); - if (buf) { - map_exec(buf, code_gen_buffer_size); - } - return buf; -} -#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */ - -static inline void code_gen_alloc(size_t tb_size) -{ - code_gen_buffer_size = size_code_gen_buffer(tb_size); - code_gen_buffer = alloc_code_gen_buffer(); - if (code_gen_buffer == NULL) { - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); - exit(1); - } - - qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE); - - /* Steal room for the prologue at the end of the buffer. This ensures - (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches - from TB's to the prologue are going to be in range. It also means - that we don't need to mark (additional) portions of the data segment - as executable. */ - code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024; - code_gen_buffer_size -= 1024; - - code_gen_buffer_max_size = code_gen_buffer_size - - (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); - code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; - tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock)); -} - -/* Must be called before using the QEMU cpus. 'tb_size' is the size - (in bytes) allocated to the translation buffer. Zero means default - size. */ -void tcg_exec_init(unsigned long tb_size) -{ - cpu_gen_init(); - code_gen_alloc(tb_size); - code_gen_ptr = code_gen_buffer; - tcg_register_jit(code_gen_buffer, code_gen_buffer_size); - page_init(); -#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE) - /* There's no guest base to take into account, so go ahead and - initialize the prologue now. */ - tcg_prologue_init(&tcg_ctx); -#endif -} - -bool tcg_enabled(void) -{ - return code_gen_buffer != NULL; -} - void cpu_exec_init_all(void) { #if !defined(CONFIG_USER_ONLY) @@ -730,763 +293,6 @@ void cpu_exec_init(CPUArchState *env) #endif } -/* Allocate a new translation block. Flush the translation buffer if - too many translation blocks or too much generated code. */ -static TranslationBlock *tb_alloc(target_ulong pc) -{ - TranslationBlock *tb; - - if (nb_tbs >= code_gen_max_blocks || - (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size) - return NULL; - tb = &tbs[nb_tbs++]; - tb->pc = pc; - tb->cflags = 0; - return tb; -} - -void tb_free(TranslationBlock *tb) -{ - /* In practice this is mostly used for single use temporary TB - Ignore the hard cases and just back up if this TB happens to - be the last one generated. */ - if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) { - code_gen_ptr = tb->tc_ptr; - nb_tbs--; - } -} - -static inline void invalidate_page_bitmap(PageDesc *p) -{ - if (p->code_bitmap) { - g_free(p->code_bitmap); - p->code_bitmap = NULL; - } - p->code_write_count = 0; -} - -/* Set to NULL all the 'first_tb' fields in all PageDescs. */ - -static void page_flush_tb_1 (int level, void **lp) -{ - int i; - - if (*lp == NULL) { - return; - } - if (level == 0) { - PageDesc *pd = *lp; - for (i = 0; i < L2_SIZE; ++i) { - pd[i].first_tb = NULL; - invalidate_page_bitmap(pd + i); - } - } else { - void **pp = *lp; - for (i = 0; i < L2_SIZE; ++i) { - page_flush_tb_1 (level - 1, pp + i); - } - } -} - -static void page_flush_tb(void) -{ - int i; - for (i = 0; i < V_L1_SIZE; i++) { - page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i); - } -} - -/* flush all the translation blocks */ -/* XXX: tb_flush is currently not thread safe */ -void tb_flush(CPUArchState *env1) -{ - CPUArchState *env; -#if defined(DEBUG_FLUSH) - printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", - (unsigned long)(code_gen_ptr - code_gen_buffer), - nb_tbs, nb_tbs > 0 ? - ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0); -#endif - if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size) - cpu_abort(env1, "Internal error: code buffer overflow\n"); - - nb_tbs = 0; - - for(env = first_cpu; env != NULL; env = env->next_cpu) { - memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *)); - } - - memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *)); - page_flush_tb(); - - code_gen_ptr = code_gen_buffer; - /* XXX: flush processor icache at this point if cache flush is - expensive */ - tb_flush_count++; -} - -#ifdef DEBUG_TB_CHECK - -static void tb_invalidate_check(target_ulong address) -{ - TranslationBlock *tb; - int i; - address &= TARGET_PAGE_MASK; - for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { - if (!(address + TARGET_PAGE_SIZE <= tb->pc || - address >= tb->pc + tb->size)) { - printf("ERROR invalidate: address=" TARGET_FMT_lx - " PC=%08lx size=%04x\n", - address, (long)tb->pc, tb->size); - } - } - } -} - -/* verify that all the pages have correct rights for code */ -static void tb_page_check(void) -{ - TranslationBlock *tb; - int i, flags1, flags2; - - for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { - flags1 = page_get_flags(tb->pc); - flags2 = page_get_flags(tb->pc + tb->size - 1); - if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { - printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n", - (long)tb->pc, tb->size, flags1, flags2); - } - } - } -} - -#endif - -/* invalidate one TB */ -static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb, - int next_offset) -{ - TranslationBlock *tb1; - for(;;) { - tb1 = *ptb; - if (tb1 == tb) { - *ptb = *(TranslationBlock **)((char *)tb1 + next_offset); - break; - } - ptb = (TranslationBlock **)((char *)tb1 + next_offset); - } -} - -static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) -{ - TranslationBlock *tb1; - unsigned int n1; - - for(;;) { - tb1 = *ptb; - n1 = (uintptr_t)tb1 & 3; - tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); - if (tb1 == tb) { - *ptb = tb1->page_next[n1]; - break; - } - ptb = &tb1->page_next[n1]; - } -} - -static inline void tb_jmp_remove(TranslationBlock *tb, int n) -{ - TranslationBlock *tb1, **ptb; - unsigned int n1; - - ptb = &tb->jmp_next[n]; - tb1 = *ptb; - if (tb1) { - /* find tb(n) in circular list */ - for(;;) { - tb1 = *ptb; - n1 = (uintptr_t)tb1 & 3; - tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); - if (n1 == n && tb1 == tb) - break; - if (n1 == 2) { - ptb = &tb1->jmp_first; - } else { - ptb = &tb1->jmp_next[n1]; - } - } - /* now we can suppress tb(n) from the list */ - *ptb = tb->jmp_next[n]; - - tb->jmp_next[n] = NULL; - } -} - -/* reset the jump entry 'n' of a TB so that it is not chained to - another TB */ -static inline void tb_reset_jump(TranslationBlock *tb, int n) -{ - tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n])); -} - -void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) -{ - CPUArchState *env; - PageDesc *p; - unsigned int h, n1; - tb_page_addr_t phys_pc; - TranslationBlock *tb1, *tb2; - - /* remove the TB from the hash list */ - phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_phys_hash_func(phys_pc); - tb_remove(&tb_phys_hash[h], tb, - offsetof(TranslationBlock, phys_hash_next)); - - /* remove the TB from the page list */ - if (tb->page_addr[0] != page_addr) { - p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); - tb_page_remove(&p->first_tb, tb); - invalidate_page_bitmap(p); - } - if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) { - p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); - tb_page_remove(&p->first_tb, tb); - invalidate_page_bitmap(p); - } - - tb_invalidated_flag = 1; - - /* remove the TB from the hash list */ - h = tb_jmp_cache_hash_func(tb->pc); - for(env = first_cpu; env != NULL; env = env->next_cpu) { - if (env->tb_jmp_cache[h] == tb) - env->tb_jmp_cache[h] = NULL; - } - - /* suppress this TB from the two jump lists */ - tb_jmp_remove(tb, 0); - tb_jmp_remove(tb, 1); - - /* suppress any remaining jumps to this TB */ - tb1 = tb->jmp_first; - for(;;) { - n1 = (uintptr_t)tb1 & 3; - if (n1 == 2) - break; - tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); - tb2 = tb1->jmp_next[n1]; - tb_reset_jump(tb1, n1); - tb1->jmp_next[n1] = NULL; - tb1 = tb2; - } - tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ - - tb_phys_invalidate_count++; -} - -static inline void set_bits(uint8_t *tab, int start, int len) -{ - int end, mask, end1; - - end = start + len; - tab += start >> 3; - mask = 0xff << (start & 7); - if ((start & ~7) == (end & ~7)) { - if (start < end) { - mask &= ~(0xff << (end & 7)); - *tab |= mask; - } - } else { - *tab++ |= mask; - start = (start + 8) & ~7; - end1 = end & ~7; - while (start < end1) { - *tab++ = 0xff; - start += 8; - } - if (start < end) { - mask = ~(0xff << (end & 7)); - *tab |= mask; - } - } -} - -static void build_page_bitmap(PageDesc *p) -{ - int n, tb_start, tb_end; - TranslationBlock *tb; - - p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8); - - tb = p->first_tb; - while (tb != NULL) { - n = (uintptr_t)tb & 3; - tb = (TranslationBlock *)((uintptr_t)tb & ~3); - /* NOTE: this is subtle as a TB may span two physical pages */ - if (n == 0) { - /* NOTE: tb_end may be after the end of the page, but - it is not a problem */ - tb_start = tb->pc & ~TARGET_PAGE_MASK; - tb_end = tb_start + tb->size; - if (tb_end > TARGET_PAGE_SIZE) - tb_end = TARGET_PAGE_SIZE; - } else { - tb_start = 0; - tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); - } - set_bits(p->code_bitmap, tb_start, tb_end - tb_start); - tb = tb->page_next[n]; - } -} - -TranslationBlock *tb_gen_code(CPUArchState *env, - target_ulong pc, target_ulong cs_base, - int flags, int cflags) -{ - TranslationBlock *tb; - uint8_t *tc_ptr; - tb_page_addr_t phys_pc, phys_page2; - target_ulong virt_page2; - int code_gen_size; - - phys_pc = get_page_addr_code(env, pc); - tb = tb_alloc(pc); - if (!tb) { - /* flush must be done */ - tb_flush(env); - /* cannot fail at this point */ - tb = tb_alloc(pc); - /* Don't forget to invalidate previous TB info. */ - tb_invalidated_flag = 1; - } - tc_ptr = code_gen_ptr; - tb->tc_ptr = tc_ptr; - tb->cs_base = cs_base; - tb->flags = flags; - tb->cflags = cflags; - cpu_gen_code(env, tb, &code_gen_size); - code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size + - CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); - - /* check next page if needed */ - virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK; - phys_page2 = -1; - if ((pc & TARGET_PAGE_MASK) != virt_page2) { - phys_page2 = get_page_addr_code(env, virt_page2); - } - tb_link_page(tb, phys_pc, phys_page2); - return tb; -} - -/* - * Invalidate all TBs which intersect with the target physical address range - * [start;end[. NOTE: start and end may refer to *different* physical pages. - * 'is_cpu_write_access' should be true if called from a real cpu write - * access: the virtual CPU will exit the current TB if code is modified inside - * this TB. - */ -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access) -{ - while (start < end) { - tb_invalidate_phys_page_range(start, end, is_cpu_write_access); - start &= TARGET_PAGE_MASK; - start += TARGET_PAGE_SIZE; - } -} - -/* - * Invalidate all TBs which intersect with the target physical address range - * [start;end[. NOTE: start and end must refer to the *same* physical page. - * 'is_cpu_write_access' should be true if called from a real cpu write - * access: the virtual CPU will exit the current TB if code is modified inside - * this TB. - */ -void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access) -{ - TranslationBlock *tb, *tb_next, *saved_tb; - CPUArchState *env = cpu_single_env; - tb_page_addr_t tb_start, tb_end; - PageDesc *p; - int n; -#ifdef TARGET_HAS_PRECISE_SMC - int current_tb_not_found = is_cpu_write_access; - TranslationBlock *current_tb = NULL; - int current_tb_modified = 0; - target_ulong current_pc = 0; - target_ulong current_cs_base = 0; - int current_flags = 0; -#endif /* TARGET_HAS_PRECISE_SMC */ - - p = page_find(start >> TARGET_PAGE_BITS); - if (!p) - return; - if (!p->code_bitmap && - ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD && - is_cpu_write_access) { - /* build code bitmap */ - build_page_bitmap(p); - } - - /* we remove all the TBs in the range [start, end[ */ - /* XXX: see if in some cases it could be faster to invalidate all the code */ - tb = p->first_tb; - while (tb != NULL) { - n = (uintptr_t)tb & 3; - tb = (TranslationBlock *)((uintptr_t)tb & ~3); - tb_next = tb->page_next[n]; - /* NOTE: this is subtle as a TB may span two physical pages */ - if (n == 0) { - /* NOTE: tb_end may be after the end of the page, but - it is not a problem */ - tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - tb_end = tb_start + tb->size; - } else { - tb_start = tb->page_addr[1]; - tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); - } - if (!(tb_end <= start || tb_start >= end)) { -#ifdef TARGET_HAS_PRECISE_SMC - if (current_tb_not_found) { - current_tb_not_found = 0; - current_tb = NULL; - if (env->mem_io_pc) { - /* now we have a real cpu fault */ - current_tb = tb_find_pc(env->mem_io_pc); - } - } - if (current_tb == tb && - (current_tb->cflags & CF_COUNT_MASK) != 1) { - /* If we are modifying the current TB, we must stop - its execution. We could be more precise by checking - that the modification is after the current PC, but it - would require a specialized function to partially - restore the CPU state */ - - current_tb_modified = 1; - cpu_restore_state(current_tb, env, env->mem_io_pc); - cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, - ¤t_flags); - } -#endif /* TARGET_HAS_PRECISE_SMC */ - /* we need to do that to handle the case where a signal - occurs while doing tb_phys_invalidate() */ - saved_tb = NULL; - if (env) { - saved_tb = env->current_tb; - env->current_tb = NULL; - } - tb_phys_invalidate(tb, -1); - if (env) { - env->current_tb = saved_tb; - if (env->interrupt_request && env->current_tb) - cpu_interrupt(env, env->interrupt_request); - } - } - tb = tb_next; - } -#if !defined(CONFIG_USER_ONLY) - /* if no code remaining, no need to continue to use slow writes */ - if (!p->first_tb) { - invalidate_page_bitmap(p); - if (is_cpu_write_access) { - tlb_unprotect_code_phys(env, start, env->mem_io_vaddr); - } - } -#endif -#ifdef TARGET_HAS_PRECISE_SMC - if (current_tb_modified) { - /* we generate a block containing just the instruction - modifying the memory. It will ensure that it cannot modify - itself */ - env->current_tb = NULL; - tb_gen_code(env, current_pc, current_cs_base, current_flags, 1); - cpu_resume_from_signal(env, NULL); - } -#endif -} - -/* len must be <= 8 and start must be a multiple of len */ -static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) -{ - PageDesc *p; - int offset, b; -#if 0 - if (1) { - qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n", - cpu_single_env->mem_io_vaddr, len, - cpu_single_env->eip, - cpu_single_env->eip + - (intptr_t)cpu_single_env->segs[R_CS].base); - } -#endif - p = page_find(start >> TARGET_PAGE_BITS); - if (!p) - return; - if (p->code_bitmap) { - offset = start & ~TARGET_PAGE_MASK; - b = p->code_bitmap[offset >> 3] >> (offset & 7); - if (b & ((1 << len) - 1)) - goto do_invalidate; - } else { - do_invalidate: - tb_invalidate_phys_page_range(start, start + len, 1); - } -} - -#if !defined(CONFIG_SOFTMMU) -static void tb_invalidate_phys_page(tb_page_addr_t addr, - uintptr_t pc, void *puc) -{ - TranslationBlock *tb; - PageDesc *p; - int n; -#ifdef TARGET_HAS_PRECISE_SMC - TranslationBlock *current_tb = NULL; - CPUArchState *env = cpu_single_env; - int current_tb_modified = 0; - target_ulong current_pc = 0; - target_ulong current_cs_base = 0; - int current_flags = 0; -#endif - - addr &= TARGET_PAGE_MASK; - p = page_find(addr >> TARGET_PAGE_BITS); - if (!p) - return; - tb = p->first_tb; -#ifdef TARGET_HAS_PRECISE_SMC - if (tb && pc != 0) { - current_tb = tb_find_pc(pc); - } -#endif - while (tb != NULL) { - n = (uintptr_t)tb & 3; - tb = (TranslationBlock *)((uintptr_t)tb & ~3); -#ifdef TARGET_HAS_PRECISE_SMC - if (current_tb == tb && - (current_tb->cflags & CF_COUNT_MASK) != 1) { - /* If we are modifying the current TB, we must stop - its execution. We could be more precise by checking - that the modification is after the current PC, but it - would require a specialized function to partially - restore the CPU state */ - - current_tb_modified = 1; - cpu_restore_state(current_tb, env, pc); - cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, - ¤t_flags); - } -#endif /* TARGET_HAS_PRECISE_SMC */ - tb_phys_invalidate(tb, addr); - tb = tb->page_next[n]; - } - p->first_tb = NULL; -#ifdef TARGET_HAS_PRECISE_SMC - if (current_tb_modified) { - /* we generate a block containing just the instruction - modifying the memory. It will ensure that it cannot modify - itself */ - env->current_tb = NULL; - tb_gen_code(env, current_pc, current_cs_base, current_flags, 1); - cpu_resume_from_signal(env, puc); - } -#endif -} -#endif - -/* add the tb in the target page and protect it if necessary */ -static inline void tb_alloc_page(TranslationBlock *tb, - unsigned int n, tb_page_addr_t page_addr) -{ - PageDesc *p; -#ifndef CONFIG_USER_ONLY - bool page_already_protected; -#endif - - tb->page_addr[n] = page_addr; - p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1); - tb->page_next[n] = p->first_tb; -#ifndef CONFIG_USER_ONLY - page_already_protected = p->first_tb != NULL; -#endif - p->first_tb = (TranslationBlock *)((uintptr_t)tb | n); - invalidate_page_bitmap(p); - -#if defined(TARGET_HAS_SMC) || 1 - -#if defined(CONFIG_USER_ONLY) - if (p->flags & PAGE_WRITE) { - target_ulong addr; - PageDesc *p2; - int prot; - - /* force the host page as non writable (writes will have a - page fault + mprotect overhead) */ - page_addr &= qemu_host_page_mask; - prot = 0; - for(addr = page_addr; addr < page_addr + qemu_host_page_size; - addr += TARGET_PAGE_SIZE) { - - p2 = page_find (addr >> TARGET_PAGE_BITS); - if (!p2) - continue; - prot |= p2->flags; - p2->flags &= ~PAGE_WRITE; - } - mprotect(g2h(page_addr), qemu_host_page_size, - (prot & PAGE_BITS) & ~PAGE_WRITE); -#ifdef DEBUG_TB_INVALIDATE - printf("protecting code page: 0x" TARGET_FMT_lx "\n", - page_addr); -#endif - } -#else - /* if some code is already present, then the pages are already - protected. So we handle the case where only the first TB is - allocated in a physical page */ - if (!page_already_protected) { - tlb_protect_code(page_addr); - } -#endif - -#endif /* TARGET_HAS_SMC */ -} - -/* add a new TB and link it to the physical page tables. phys_page2 is - (-1) to indicate that only one page contains the TB. */ -static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, - tb_page_addr_t phys_page2) -{ - unsigned int h; - TranslationBlock **ptb; - - /* Grab the mmap lock to stop another thread invalidating this TB - before we are done. */ - mmap_lock(); - /* add in the physical hash table */ - h = tb_phys_hash_func(phys_pc); - ptb = &tb_phys_hash[h]; - tb->phys_hash_next = *ptb; - *ptb = tb; - - /* add in the page list */ - tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); - if (phys_page2 != -1) - tb_alloc_page(tb, 1, phys_page2); - else - tb->page_addr[1] = -1; - - tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); - tb->jmp_next[0] = NULL; - tb->jmp_next[1] = NULL; - - /* init original jump addresses */ - if (tb->tb_next_offset[0] != 0xffff) - tb_reset_jump(tb, 0); - if (tb->tb_next_offset[1] != 0xffff) - tb_reset_jump(tb, 1); - -#ifdef DEBUG_TB_CHECK - tb_page_check(); -#endif - mmap_unlock(); -} - -#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) -/* check whether the given addr is in TCG generated code buffer or not */ -bool is_tcg_gen_code(uintptr_t tc_ptr) -{ - /* This can be called during code generation, code_gen_buffer_max_size - is used instead of code_gen_ptr for upper boundary checking */ - return (tc_ptr >= (uintptr_t)code_gen_buffer && - tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size)); -} -#endif - -/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < - tb[1].tc_ptr. Return NULL if not found */ -TranslationBlock *tb_find_pc(uintptr_t tc_ptr) -{ - int m_min, m_max, m; - uintptr_t v; - TranslationBlock *tb; - - if (nb_tbs <= 0) - return NULL; - if (tc_ptr < (uintptr_t)code_gen_buffer || - tc_ptr >= (uintptr_t)code_gen_ptr) { - return NULL; - } - /* binary search (cf Knuth) */ - m_min = 0; - m_max = nb_tbs - 1; - while (m_min <= m_max) { - m = (m_min + m_max) >> 1; - tb = &tbs[m]; - v = (uintptr_t)tb->tc_ptr; - if (v == tc_ptr) - return tb; - else if (tc_ptr < v) { - m_max = m - 1; - } else { - m_min = m + 1; - } - } - return &tbs[m_max]; -} - -static void tb_reset_jump_recursive(TranslationBlock *tb); - -static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n) -{ - TranslationBlock *tb1, *tb_next, **ptb; - unsigned int n1; - - tb1 = tb->jmp_next[n]; - if (tb1 != NULL) { - /* find head of list */ - for(;;) { - n1 = (uintptr_t)tb1 & 3; - tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); - if (n1 == 2) - break; - tb1 = tb1->jmp_next[n1]; - } - /* we are now sure now that tb jumps to tb1 */ - tb_next = tb1; - - /* remove tb from the jmp_first list */ - ptb = &tb_next->jmp_first; - for(;;) { - tb1 = *ptb; - n1 = (uintptr_t)tb1 & 3; - tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); - if (n1 == n && tb1 == tb) - break; - ptb = &tb1->jmp_next[n1]; - } - *ptb = tb->jmp_next[n]; - tb->jmp_next[n] = NULL; - - /* suppress the jump to next tb in generated code */ - tb_reset_jump(tb, n); - - /* suppress jumps in the tb on which we could have jumped */ - tb_reset_jump_recursive(tb_next); - } -} - -static void tb_reset_jump_recursive(TranslationBlock *tb) -{ - tb_reset_jump_recursive2(tb, 0); - tb_reset_jump_recursive2(tb, 1); -} - #if defined(TARGET_HAS_ICE) #if defined(CONFIG_USER_ONLY) static void breakpoint_invalidate(CPUArchState *env, target_ulong pc) @@ -1494,21 +300,6 @@ static void breakpoint_invalidate(CPUArchState *env, target_ulong pc) tb_invalidate_phys_page_range(pc, pc + 1, 0); } #else -void tb_invalidate_phys_addr(hwaddr addr) -{ - ram_addr_t ram_addr; - MemoryRegionSection *section; - - section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS); - if (!(memory_region_is_ram(section->mr) - || (section->mr->rom_device && section->mr->readable))) { - return; - } - ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) - + memory_region_section_addr(section, addr); - tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); -} - static void breakpoint_invalidate(CPUArchState *env, target_ulong pc) { tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) | @@ -1690,67 +481,6 @@ void cpu_single_step(CPUArchState *env, int enabled) #endif } -static void cpu_unlink_tb(CPUArchState *env) -{ - /* FIXME: TB unchaining isn't SMP safe. For now just ignore the - problem and hope the cpu will stop of its own accord. For userspace - emulation this often isn't actually as bad as it sounds. Often - signals are used primarily to interrupt blocking syscalls. */ - TranslationBlock *tb; - static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED; - - spin_lock(&interrupt_lock); - tb = env->current_tb; - /* if the cpu is currently executing code, we must unlink it and - all the potentially executing TB */ - if (tb) { - env->current_tb = NULL; - tb_reset_jump_recursive(tb); - } - spin_unlock(&interrupt_lock); -} - -#ifndef CONFIG_USER_ONLY -/* mask must never be zero, except for A20 change call */ -static void tcg_handle_interrupt(CPUArchState *env, int mask) -{ - CPUState *cpu = ENV_GET_CPU(env); - int old_mask; - - old_mask = env->interrupt_request; - env->interrupt_request |= mask; - - /* - * If called from iothread context, wake the target cpu in - * case its halted. - */ - if (!qemu_cpu_is_self(cpu)) { - qemu_cpu_kick(cpu); - return; - } - - if (use_icount) { - env->icount_decr.u16.high = 0xffff; - if (!can_do_io(env) - && (mask & ~old_mask) != 0) { - cpu_abort(env, "Raised interrupt while not in I/O function"); - } - } else { - cpu_unlink_tb(env); - } -} - -CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt; - -#else /* CONFIG_USER_ONLY */ - -void cpu_interrupt(CPUArchState *env, int mask) -{ - env->interrupt_request |= mask; - cpu_unlink_tb(env); -} -#endif /* CONFIG_USER_ONLY */ - void cpu_reset_interrupt(CPUArchState *env, int mask) { env->interrupt_request &= ~mask; @@ -1829,21 +559,6 @@ CPUArchState *cpu_copy(CPUArchState *env) } #if !defined(CONFIG_USER_ONLY) -void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr) -{ - unsigned int i; - - /* Discard jump cache entries for any tb which might potentially - overlap the flushed page. */ - i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); - memset (&env->tb_jmp_cache[i], 0, - TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); - - i = tb_jmp_cache_hash_page(addr); - memset (&env->tb_jmp_cache[i], 0, - TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); -} - static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end, uintptr_t length) { @@ -1933,264 +648,6 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env, return iotlb; } - -#else -/* - * Walks guest process memory "regions" one by one - * and calls callback function 'fn' for each region. - */ - -struct walk_memory_regions_data -{ - walk_memory_regions_fn fn; - void *priv; - uintptr_t start; - int prot; -}; - -static int walk_memory_regions_end(struct walk_memory_regions_data *data, - abi_ulong end, int new_prot) -{ - if (data->start != -1ul) { - int rc = data->fn(data->priv, data->start, end, data->prot); - if (rc != 0) { - return rc; - } - } - - data->start = (new_prot ? end : -1ul); - data->prot = new_prot; - - return 0; -} - -static int walk_memory_regions_1(struct walk_memory_regions_data *data, - abi_ulong base, int level, void **lp) -{ - abi_ulong pa; - int i, rc; - - if (*lp == NULL) { - return walk_memory_regions_end(data, base, 0); - } - - if (level == 0) { - PageDesc *pd = *lp; - for (i = 0; i < L2_SIZE; ++i) { - int prot = pd[i].flags; - - pa = base | (i << TARGET_PAGE_BITS); - if (prot != data->prot) { - rc = walk_memory_regions_end(data, pa, prot); - if (rc != 0) { - return rc; - } - } - } - } else { - void **pp = *lp; - for (i = 0; i < L2_SIZE; ++i) { - pa = base | ((abi_ulong)i << - (TARGET_PAGE_BITS + L2_BITS * level)); - rc = walk_memory_regions_1(data, pa, level - 1, pp + i); - if (rc != 0) { - return rc; - } - } - } - - return 0; -} - -int walk_memory_regions(void *priv, walk_memory_regions_fn fn) -{ - struct walk_memory_regions_data data; - uintptr_t i; - - data.fn = fn; - data.priv = priv; - data.start = -1ul; - data.prot = 0; - - for (i = 0; i < V_L1_SIZE; i++) { - int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT, - V_L1_SHIFT / L2_BITS - 1, l1_map + i); - if (rc != 0) { - return rc; - } - } - - return walk_memory_regions_end(&data, 0, 0); -} - -static int dump_region(void *priv, abi_ulong start, - abi_ulong end, unsigned long prot) -{ - FILE *f = (FILE *)priv; - - (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx - " "TARGET_ABI_FMT_lx" %c%c%c\n", - start, end, end - start, - ((prot & PAGE_READ) ? 'r' : '-'), - ((prot & PAGE_WRITE) ? 'w' : '-'), - ((prot & PAGE_EXEC) ? 'x' : '-')); - - return (0); -} - -/* dump memory mappings */ -void page_dump(FILE *f) -{ - (void) fprintf(f, "%-8s %-8s %-8s %s\n", - "start", "end", "size", "prot"); - walk_memory_regions(f, dump_region); -} - -int page_get_flags(target_ulong address) -{ - PageDesc *p; - - p = page_find(address >> TARGET_PAGE_BITS); - if (!p) - return 0; - return p->flags; -} - -/* Modify the flags of a page and invalidate the code if necessary. - The flag PAGE_WRITE_ORG is positioned automatically depending - on PAGE_WRITE. The mmap_lock should already be held. */ -void page_set_flags(target_ulong start, target_ulong end, int flags) -{ - target_ulong addr, len; - - /* This function should never be called with addresses outside the - guest address space. If this assert fires, it probably indicates - a missing call to h2g_valid. */ -#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS - assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); -#endif - assert(start < end); - - start = start & TARGET_PAGE_MASK; - end = TARGET_PAGE_ALIGN(end); - - if (flags & PAGE_WRITE) { - flags |= PAGE_WRITE_ORG; - } - - for (addr = start, len = end - start; - len != 0; - len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { - PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1); - - /* If the write protection bit is set, then we invalidate - the code inside. */ - if (!(p->flags & PAGE_WRITE) && - (flags & PAGE_WRITE) && - p->first_tb) { - tb_invalidate_phys_page(addr, 0, NULL); - } - p->flags = flags; - } -} - -int page_check_range(target_ulong start, target_ulong len, int flags) -{ - PageDesc *p; - target_ulong end; - target_ulong addr; - - /* This function should never be called with addresses outside the - guest address space. If this assert fires, it probably indicates - a missing call to h2g_valid. */ -#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS - assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); -#endif - - if (len == 0) { - return 0; - } - if (start + len - 1 < start) { - /* We've wrapped around. */ - return -1; - } - - end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */ - start = start & TARGET_PAGE_MASK; - - for (addr = start, len = end - start; - len != 0; - len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { - p = page_find(addr >> TARGET_PAGE_BITS); - if( !p ) - return -1; - if( !(p->flags & PAGE_VALID) ) - return -1; - - if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) - return -1; - if (flags & PAGE_WRITE) { - if (!(p->flags & PAGE_WRITE_ORG)) - return -1; - /* unprotect the page if it was put read-only because it - contains translated code */ - if (!(p->flags & PAGE_WRITE)) { - if (!page_unprotect(addr, 0, NULL)) - return -1; - } - return 0; - } - } - return 0; -} - -/* called from signal handler: invalidate the code and unprotect the - page. Return TRUE if the fault was successfully handled. */ -int page_unprotect(target_ulong address, uintptr_t pc, void *puc) -{ - unsigned int prot; - PageDesc *p; - target_ulong host_start, host_end, addr; - - /* Technically this isn't safe inside a signal handler. However we - know this only ever happens in a synchronous SEGV handler, so in - practice it seems to be ok. */ - mmap_lock(); - - p = page_find(address >> TARGET_PAGE_BITS); - if (!p) { - mmap_unlock(); - return 0; - } - - /* if the page was really writable, then we change its - protection back to writable */ - if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) { - host_start = address & qemu_host_page_mask; - host_end = host_start + qemu_host_page_size; - - prot = 0; - for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) { - p = page_find(addr >> TARGET_PAGE_BITS); - p->flags |= PAGE_WRITE; - prot |= p->flags; - - /* and since the content will be modified, we must invalidate - the corresponding translated code. */ - tb_invalidate_phys_page(addr, pc, puc); -#ifdef DEBUG_TB_CHECK - tb_invalidate_check(addr); -#endif - } - mprotect((void *)g2h(host_start), qemu_host_page_size, - prot & PAGE_BITS); - - mmap_unlock(); - return 1; - } - mmap_unlock(); - return 0; -} #endif /* defined(CONFIG_USER_ONLY) */ #if !defined(CONFIG_USER_ONLY) @@ -2954,7 +1411,6 @@ static void check_watchpoint(int offset, int len_mask, int flags) { CPUArchState *env = cpu_single_env; target_ulong pc, cs_base; - TranslationBlock *tb; target_ulong vaddr; CPUWatchpoint *wp; int cpu_flags; @@ -2973,13 +1429,7 @@ static void check_watchpoint(int offset, int len_mask, int flags) wp->flags |= BP_WATCHPOINT_HIT; if (!env->watchpoint_hit) { env->watchpoint_hit = wp; - tb = tb_find_pc(env->mem_io_pc); - if (!tb) { - cpu_abort(env, "check_watchpoint: could not find TB for " - "pc=%p", (void *)env->mem_io_pc); - } - cpu_restore_state(tb, env, env->mem_io_pc); - tb_phys_invalidate(tb, -1); + tb_check_watchpoint(env); if (wp->flags & BP_STOP_BEFORE_ACCESS) { env->exception_index = EXCP_DEBUG; cpu_loop_exit(env); @@ -4090,119 +2540,8 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr, } #endif -/* in deterministic execution mode, instructions doing device I/Os - must be at the end of the TB */ -void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr) -{ - TranslationBlock *tb; - uint32_t n, cflags; - target_ulong pc, cs_base; - uint64_t flags; - - tb = tb_find_pc(retaddr); - if (!tb) { - cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", - (void *)retaddr); - } - n = env->icount_decr.u16.low + tb->icount; - cpu_restore_state(tb, env, retaddr); - /* Calculate how many instructions had been executed before the fault - occurred. */ - n = n - env->icount_decr.u16.low; - /* Generate a new TB ending on the I/O insn. */ - n++; - /* On MIPS and SH, delay slot instructions can only be restarted if - they were already the first instruction in the TB. If this is not - the first instruction in a TB then re-execute the preceding - branch. */ -#if defined(TARGET_MIPS) - if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) { - env->active_tc.PC -= 4; - env->icount_decr.u16.low++; - env->hflags &= ~MIPS_HFLAG_BMASK; - } -#elif defined(TARGET_SH4) - if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0 - && n > 1) { - env->pc -= 2; - env->icount_decr.u16.low++; - env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); - } -#endif - /* This should never happen. */ - if (n > CF_COUNT_MASK) - cpu_abort(env, "TB too big during recompile"); - - cflags = n | CF_LAST_IO; - pc = tb->pc; - cs_base = tb->cs_base; - flags = tb->flags; - tb_phys_invalidate(tb, -1); - /* FIXME: In theory this could raise an exception. In practice - we have already translated the block once so it's probably ok. */ - tb_gen_code(env, pc, cs_base, flags, cflags); - /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not - the first in the TB) then we end up generating a whole new TB and - repeating the fault, which is horribly inefficient. - Better would be to execute just this insn uncached, or generate a - second new TB. */ - cpu_resume_from_signal(env, NULL); -} - #if !defined(CONFIG_USER_ONLY) -void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) -{ - int i, target_code_size, max_target_code_size; - int direct_jmp_count, direct_jmp2_count, cross_page; - TranslationBlock *tb; - - target_code_size = 0; - max_target_code_size = 0; - cross_page = 0; - direct_jmp_count = 0; - direct_jmp2_count = 0; - for(i = 0; i < nb_tbs; i++) { - tb = &tbs[i]; - target_code_size += tb->size; - if (tb->size > max_target_code_size) - max_target_code_size = tb->size; - if (tb->page_addr[1] != -1) - cross_page++; - if (tb->tb_next_offset[0] != 0xffff) { - direct_jmp_count++; - if (tb->tb_next_offset[1] != 0xffff) { - direct_jmp2_count++; - } - } - } - /* XXX: avoid using doubles ? */ - cpu_fprintf(f, "Translation buffer state:\n"); - cpu_fprintf(f, "gen code size %td/%zd\n", - code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size); - cpu_fprintf(f, "TB count %d/%d\n", - nb_tbs, code_gen_max_blocks); - cpu_fprintf(f, "TB avg target size %d max=%d bytes\n", - nb_tbs ? target_code_size / nb_tbs : 0, - max_target_code_size); - cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n", - nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0, - target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0); - cpu_fprintf(f, "cross page TB count %d (%d%%)\n", - cross_page, - nb_tbs ? (cross_page * 100) / nb_tbs : 0); - cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n", - direct_jmp_count, - nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0, - direct_jmp2_count, - nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0); - cpu_fprintf(f, "\nStatistics:\n"); - cpu_fprintf(f, "TB flush count %d\n", tb_flush_count); - cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count); - cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); - tcg_dump_info(f, cpu_fprintf); -} - /* * A helper function for the _utterly broken_ virtio device model to find out if * it's running on a big endian machine. Don't do this at home kids! diff --git a/hw/Makefile.objs b/hw/Makefile.objs index 9d33b189e6..bcf278d4ec 100644 --- a/hw/Makefile.objs +++ b/hw/Makefile.objs @@ -37,6 +37,8 @@ common-obj-$(CONFIG_I8259) += i8259_common.o i8259.o common-obj-y += fifo.o common-obj-y += pam.o +extra-obj-y += pci/ + # PPC devices common-obj-$(CONFIG_PREP_PCI) += prep_pci.o common-obj-$(CONFIG_I82378) += i82378.o diff --git a/hw/arm_boot.c b/hw/arm_boot.c index 92e2cab476..ec3b8d5d12 100644 --- a/hw/arm_boot.c +++ b/hw/arm_boot.c @@ -44,11 +44,17 @@ static uint32_t bootloader[] = { * for an interprocessor interrupt and polling a configurable * location for the kernel secondary CPU entry point. */ +#define DSB_INSN 0xf57ff04f +#define CP15_DSB_INSN 0xee070f9a /* mcr cp15, 0, r0, c7, c10, 4 */ + static uint32_t smpboot[] = { - 0xe59f201c, /* ldr r2, gic_cpu_if */ - 0xe59f001c, /* ldr r0, startaddr */ + 0xe59f2028, /* ldr r2, gic_cpu_if */ + 0xe59f0028, /* ldr r0, startaddr */ 0xe3a01001, /* mov r1, #1 */ - 0xe5821000, /* str r1, [r2] */ + 0xe5821000, /* str r1, [r2] - set GICC_CTLR.Enable */ + 0xe3a010ff, /* mov r1, #0xff */ + 0xe5821004, /* str r1, [r2, 4] - set GIC_PMR.Priority to 0xff */ + DSB_INSN, /* dsb */ 0xe320f003, /* wfi */ 0xe5901000, /* ldr r1, [r0] */ 0xe1110001, /* tst r1, r1 */ @@ -65,6 +71,11 @@ static void default_write_secondary(ARMCPU *cpu, smpboot[ARRAY_SIZE(smpboot) - 1] = info->smp_bootreg_addr; smpboot[ARRAY_SIZE(smpboot) - 2] = info->gic_cpu_if_addr; for (n = 0; n < ARRAY_SIZE(smpboot); n++) { + /* Replace DSB with the pre-v7 DSB if necessary. */ + if (!arm_feature(&cpu->env, ARM_FEATURE_V7) && + smpboot[n] == DSB_INSN) { + smpboot[n] = CP15_DSB_INSN; + } smpboot[n] = tswap32(smpboot[n]); } rom_add_blob_fixed("smpboot", smpboot, sizeof(smpboot), diff --git a/hw/arm_gic.c b/hw/arm_gic.c index f9e423f152..8d769de4f5 100644 --- a/hw/arm_gic.c +++ b/hw/arm_gic.c @@ -73,7 +73,7 @@ void gic_update(GICState *s) } } level = 0; - if (best_prio <= s->priority_mask[cpu]) { + if (best_prio < s->priority_mask[cpu]) { s->current_pending[cpu] = best_irq; if (best_prio < s->running_priority[cpu]) { DPRINTF("Raised pending IRQ %d\n", best_irq); @@ -374,7 +374,8 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, value = 0xff; for (i = 0; i < 8; i++) { if (value & (1 << i)) { - int mask = (irq < GIC_INTERNAL) ? (1 << cpu) : GIC_TARGET(irq); + int mask = + (irq < GIC_INTERNAL) ? (1 << cpu) : GIC_TARGET(irq + i); int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK; if (!GIC_TEST_ENABLED(irq + i, cm)) { @@ -417,7 +418,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset, for (i = 0; i < 8; i++) { if (value & (1 << i)) { - GIC_SET_PENDING(irq + i, GIC_TARGET(irq)); + GIC_SET_PENDING(irq + i, GIC_TARGET(irq + i)); } } } else if (offset < 0x300) { diff --git a/hw/arm_gic_common.c b/hw/arm_gic_common.c index 8369309d21..73ae331807 100644 --- a/hw/arm_gic_common.c +++ b/hw/arm_gic_common.c @@ -127,7 +127,11 @@ static void arm_gic_common_reset(DeviceState *dev) int i; memset(s->irq_state, 0, GIC_MAXIRQ * sizeof(gic_irq_state)); for (i = 0 ; i < s->num_cpu; i++) { - s->priority_mask[i] = 0xf0; + if (s->revision == REV_11MPCORE) { + s->priority_mask[i] = 0xf0; + } else { + s->priority_mask[i] = 0; + } s->current_pending[i] = 1023; s->running_irq[i] = 1023; s->running_priority[i] = 0x100; diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c index f0a2e7b5d2..4963678bf1 100644 --- a/hw/armv7m_nvic.c +++ b/hw/armv7m_nvic.c @@ -455,9 +455,11 @@ static void armv7m_nvic_reset(DeviceState *dev) nc->parent_reset(dev); /* Common GIC reset resets to disabled; the NVIC doesn't have * per-CPU interfaces so mark our non-existent CPU interface - * as enabled by default. + * as enabled by default, and with a priority mask which allows + * all interrupts through. */ s->gic.cpu_enabled[0] = 1; + s->gic.priority_mask[0] = 0x100; /* The NVIC as a whole is always enabled. */ s->gic.enabled = 1; systick_reset(s); diff --git a/hw/boards.h b/hw/boards.h index 813d0e5109..c66fa16a9d 100644 --- a/hw/boards.h +++ b/hw/boards.h @@ -3,6 +3,7 @@ #ifndef HW_BOARDS_H #define HW_BOARDS_H +#include "blockdev.h" #include "qdev.h" typedef struct QEMUMachineInitArgs { @@ -24,7 +25,7 @@ typedef struct QEMUMachine { const char *desc; QEMUMachineInitFunc *init; QEMUMachineResetFunc *reset; - int use_scsi; + BlockInterfaceType block_default_type; int max_cpus; unsigned int no_serial:1, no_parallel:1, diff --git a/hw/device-hotplug.c b/hw/device-hotplug.c index 6d9c080381..839b9ea1d4 100644 --- a/hw/device-hotplug.c +++ b/hw/device-hotplug.c @@ -39,7 +39,7 @@ DriveInfo *add_init_drive(const char *optstr) if (!opts) return NULL; - dinfo = drive_init(opts, current_machine->use_scsi); + dinfo = drive_init(opts, current_machine->block_default_type); if (!dinfo) { qemu_opts_del(opts); return NULL; diff --git a/hw/ds1338.c b/hw/ds1338.c index b576d56438..1aefa3ba04 100644 --- a/hw/ds1338.c +++ b/hw/ds1338.c @@ -17,9 +17,16 @@ */ #define NVRAM_SIZE 64 +/* Flags definitions */ +#define SECONDS_CH 0x80 +#define HOURS_12 0x40 +#define HOURS_PM 0x20 +#define CTRL_OSF 0x20 + typedef struct { I2CSlave i2c; int64_t offset; + uint8_t wday_offset; uint8_t nvram[NVRAM_SIZE]; int32_t ptr; bool addr_byte; @@ -27,12 +34,13 @@ typedef struct { static const VMStateDescription vmstate_ds1338 = { .name = "ds1338", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_I2C_SLAVE(i2c, DS1338State), VMSTATE_INT64(offset, DS1338State), + VMSTATE_UINT8_V(wday_offset, DS1338State, 2), VMSTATE_UINT8_ARRAY(nvram, DS1338State, NVRAM_SIZE), VMSTATE_INT32(ptr, DS1338State), VMSTATE_BOOL(addr_byte, DS1338State), @@ -49,17 +57,22 @@ static void capture_current_time(DS1338State *s) qemu_get_timedate(&now, s->offset); s->nvram[0] = to_bcd(now.tm_sec); s->nvram[1] = to_bcd(now.tm_min); - if (s->nvram[2] & 0x40) { - s->nvram[2] = (to_bcd((now.tm_hour % 12)) + 1) | 0x40; - if (now.tm_hour >= 12) { - s->nvram[2] |= 0x20; + if (s->nvram[2] & HOURS_12) { + int tmp = now.tm_hour; + if (tmp == 0) { + tmp = 24; + } + if (tmp <= 12) { + s->nvram[2] = HOURS_12 | to_bcd(tmp); + } else { + s->nvram[2] = HOURS_12 | HOURS_PM | to_bcd(tmp - 12); } } else { s->nvram[2] = to_bcd(now.tm_hour); } - s->nvram[3] = to_bcd(now.tm_wday) + 1; + s->nvram[3] = (now.tm_wday + s->wday_offset) % 7 + 1; s->nvram[4] = to_bcd(now.tm_mday); - s->nvram[5] = to_bcd(now.tm_mon) + 1; + s->nvram[5] = to_bcd(now.tm_mon + 1); s->nvram[6] = to_bcd(now.tm_year - 100); } @@ -114,7 +127,8 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data) s->addr_byte = false; return 0; } - if (s->ptr < 8) { + if (s->ptr < 7) { + /* Time register. */ struct tm now; qemu_get_timedate(&now, s->offset); switch(s->ptr) { @@ -126,19 +140,27 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data) now.tm_min = from_bcd(data & 0x7f); break; case 2: - if (data & 0x40) { - if (data & 0x20) { - data = from_bcd(data & 0x4f) + 11; - } else { - data = from_bcd(data & 0x1f) - 1; + if (data & HOURS_12) { + int tmp = from_bcd(data & (HOURS_PM - 1)); + if (data & HOURS_PM) { + tmp += 12; + } + if (tmp == 24) { + tmp = 0; } + now.tm_hour = tmp; } else { - data = from_bcd(data); + now.tm_hour = from_bcd(data & (HOURS_12 - 1)); } - now.tm_hour = data; break; case 3: - now.tm_wday = from_bcd(data & 7) - 1; + { + /* The day field is supposed to contain a value in + the range 1-7. Otherwise behavior is undefined. + */ + int user_wday = (data & 7) - 1; + s->wday_offset = (user_wday - now.tm_wday + 7) % 7; + } break; case 4: now.tm_mday = from_bcd(data & 0x3f); @@ -149,11 +171,19 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data) case 6: now.tm_year = from_bcd(data) + 100; break; - case 7: - /* Control register. Currently ignored. */ - break; } s->offset = qemu_timedate_diff(&now); + } else if (s->ptr == 7) { + /* Control register. */ + + /* Ensure bits 2, 3 and 6 will read back as zero. */ + data &= 0xB3; + + /* Attempting to write the OSF flag to logic 1 leaves the + value unchanged. */ + data = (data & ~CTRL_OSF) | (data & s->nvram[s->ptr] & CTRL_OSF); + + s->nvram[s->ptr] = data; } else { s->nvram[s->ptr] = data; } @@ -166,6 +196,18 @@ static int ds1338_init(I2CSlave *i2c) return 0; } +static void ds1338_reset(DeviceState *dev) +{ + DS1338State *s = FROM_I2C_SLAVE(DS1338State, I2C_SLAVE_FROM_QDEV(dev)); + + /* The clock is running and synchronized with the host */ + s->offset = 0; + s->wday_offset = 0; + memset(s->nvram, 0, NVRAM_SIZE); + s->ptr = 0; + s->addr_byte = false; +} + static void ds1338_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -175,6 +217,7 @@ static void ds1338_class_init(ObjectClass *klass, void *data) k->event = ds1338_event; k->recv = ds1338_recv; k->send = ds1338_send; + dc->reset = ds1338_reset; dc->vmsd = &vmstate_ds1338; } diff --git a/hw/exynos4210.c b/hw/exynos4210.c index 00d4db8871..22148cd946 100644 --- a/hw/exynos4210.c +++ b/hw/exynos4210.c @@ -80,12 +80,16 @@ void exynos4210_write_secondary(ARMCPU *cpu, { int n; uint32_t smpboot[] = { - 0xe59f3024, /* ldr r3, External gic_cpu_if */ - 0xe59f2024, /* ldr r2, Internal gic_cpu_if */ - 0xe59f0024, /* ldr r0, startaddr */ + 0xe59f3034, /* ldr r3, External gic_cpu_if */ + 0xe59f2034, /* ldr r2, Internal gic_cpu_if */ + 0xe59f0034, /* ldr r0, startaddr */ 0xe3a01001, /* mov r1, #1 */ 0xe5821000, /* str r1, [r2] */ 0xe5831000, /* str r1, [r3] */ + 0xe3a010ff, /* mov r1, #0xff */ + 0xe5821004, /* str r1, [r2, #4] */ + 0xe5831004, /* str r1, [r3, #4] */ + 0xf57ff04f, /* dsb */ 0xe320f003, /* wfi */ 0xe5901000, /* ldr r1, [r0] */ 0xe1110001, /* tst r1, r1 */ diff --git a/hw/exynos4210_mct.c b/hw/exynos4210_mct.c index e79cd6ac01..37dbda92df 100644 --- a/hw/exynos4210_mct.c +++ b/hw/exynos4210_mct.c @@ -568,7 +568,7 @@ static void exynos4210_gfrc_event(void *opaque) /* Reload FRC to reach nearest comparator */ s->g_timer.curr_comp = exynos4210_gcomp_find(s); distance = exynos4210_gcomp_get_distance(s, s->g_timer.curr_comp); - if (distance > MCT_GT_COUNTER_STEP) { + if (distance > MCT_GT_COUNTER_STEP || !distance) { distance = MCT_GT_COUNTER_STEP; } exynos4210_gfrc_set_count(&s->g_timer, distance); diff --git a/hw/highbank.c b/hw/highbank.c index afbb005422..8e35127c8a 100644 --- a/hw/highbank.c +++ b/hw/highbank.c @@ -44,9 +44,12 @@ static void hb_write_secondary(ARMCPU *cpu, const struct arm_boot_info *info) 0xe210000f, /* ands r0, r0, #0x0f */ 0xe3a03040, /* mov r3, #0x40 - jump address is 0x40 + 0x10 * core id */ 0xe0830200, /* add r0, r3, r0, lsl #4 */ - 0xe59f2018, /* ldr r2, privbase */ + 0xe59f2024, /* ldr r2, privbase */ 0xe3a01001, /* mov r1, #1 */ - 0xe5821100, /* str r1, [r2, #256] */ + 0xe5821100, /* str r1, [r2, #256] - set GICC_CTLR.Enable */ + 0xe3a010ff, /* mov r1, #0xff */ + 0xe5821104, /* str r1, [r2, #260] - set GICC_PMR.Priority to 0xff */ + 0xf57ff04f, /* dsb */ 0xe320f003, /* wfi */ 0xe5901000, /* ldr r1, [r0] */ 0xe1110001, /* tst r1, r1 */ @@ -326,7 +329,7 @@ static QEMUMachine highbank_machine = { .name = "highbank", .desc = "Calxeda Highbank (ECX-1000)", .init = highbank_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; diff --git a/hw/ide/core.c b/hw/ide/core.c index adc4aa41b9..0e5bc7fe3b 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -1869,6 +1869,8 @@ static void ide_reset(IDEState *s) s->io_buffer_index = 0; s->cd_sector_size = 0; s->atapi_dma = 0; + s->tray_locked = 0; + s->tray_open = 0; /* ATA DMA state */ s->io_buffer_size = 0; s->req_nb_sectors = 0; diff --git a/hw/kvmvapic.c b/hw/kvmvapic.c index e04c4011d7..60c8fc46aa 100644 --- a/hw/kvmvapic.c +++ b/hw/kvmvapic.c @@ -387,7 +387,6 @@ static void patch_instruction(VAPICROMState *s, CPUX86State *env, target_ulong i VAPICHandlers *handlers; uint8_t opcode[2]; uint32_t imm32; - TranslationBlock *current_tb; target_ulong current_pc = 0; target_ulong current_cs_base = 0; int current_flags = 0; @@ -399,8 +398,7 @@ static void patch_instruction(VAPICROMState *s, CPUX86State *env, target_ulong i } if (!kvm_enabled()) { - current_tb = tb_find_pc(env->mem_io_pc); - cpu_restore_state(current_tb, env, env->mem_io_pc); + cpu_restore_state(env, env->mem_io_pc); cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, ¤t_flags); } diff --git a/hw/leon3.c b/hw/leon3.c index 774273828f..ef83dffd85 100644 --- a/hw/leon3.c +++ b/hw/leon3.c @@ -212,7 +212,6 @@ static QEMUMachine leon3_generic_machine = { .name = "leon3_generic", .desc = "Leon-3 generic", .init = leon3_generic_hw_init, - .use_scsi = 0, }; static void leon3_machine_init(void) diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c index 0847427241..ea1416ae2f 100644 --- a/hw/mips_jazz.c +++ b/hw/mips_jazz.c @@ -324,14 +324,14 @@ static QEMUMachine mips_magnum_machine = { .name = "magnum", .desc = "MIPS Magnum", .init = mips_magnum_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine mips_pica61_machine = { .name = "pica61", .desc = "Acer Pica 61", .init = mips_pica61_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static void mips_jazz_machine_init(void) diff --git a/hw/openpic.c b/hw/openpic.c index 4791dc6eaf..9c956b9dcc 100644 --- a/hw/openpic.c +++ b/hw/openpic.c @@ -37,6 +37,8 @@ #include "ppc_mac.h" #include "pci/pci.h" #include "openpic.h" +#include "sysbus.h" +#include "pci/msi.h" //#define DEBUG_OPENPIC @@ -46,89 +48,61 @@ #define DPRINTF(fmt, ...) do { } while (0) #endif -#define USE_MPCxxx /* Intel model is broken, for now */ - -#if defined (USE_INTEL_GW80314) -/* Intel GW80314 I/O Companion chip */ - -#define MAX_CPU 4 -#define MAX_IRQ 32 -#define MAX_DBL 4 -#define MAX_MBX 4 -#define MAX_TMR 4 -#define VECTOR_BITS 8 -#define MAX_IPI 4 - -#define VID (0x00000000) - -#elif defined(USE_MPCxxx) - -#define MAX_CPU 15 -#define MAX_IRQ 128 -#define MAX_DBL 0 -#define MAX_MBX 0 +#define MAX_CPU 15 +#define MAX_SRC 256 #define MAX_TMR 4 #define VECTOR_BITS 8 #define MAX_IPI 4 +#define MAX_MSI 8 +#define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR) #define VID 0x03 /* MPIC version ID */ -#define VENI 0x00000000 /* Vendor ID */ - -enum { - IRQ_IPVP = 0, - IRQ_IDE, -}; -/* OpenPIC */ -#define OPENPIC_MAX_CPU 2 -#define OPENPIC_MAX_IRQ 64 -#define OPENPIC_EXT_IRQ 48 -#define OPENPIC_MAX_TMR MAX_TMR -#define OPENPIC_MAX_IPI MAX_IPI +/* OpenPIC capability flags */ +#define OPENPIC_FLAG_IDE_CRIT (1 << 0) + +/* OpenPIC address map */ +#define OPENPIC_GLB_REG_START 0x0 +#define OPENPIC_GLB_REG_SIZE 0x10F0 +#define OPENPIC_TMR_REG_START 0x10F0 +#define OPENPIC_TMR_REG_SIZE 0x220 +#define OPENPIC_MSI_REG_START 0x1600 +#define OPENPIC_MSI_REG_SIZE 0x200 +#define OPENPIC_SRC_REG_START 0x10000 +#define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20) +#define OPENPIC_CPU_REG_START 0x20000 +#define OPENPIC_CPU_REG_SIZE 0x100 + ((MAX_CPU - 1) * 0x1000) + +/* Raven */ +#define RAVEN_MAX_CPU 2 +#define RAVEN_MAX_EXT 48 +#define RAVEN_MAX_IRQ 64 +#define RAVEN_MAX_TMR MAX_TMR +#define RAVEN_MAX_IPI MAX_IPI /* Interrupt definitions */ -#define OPENPIC_IRQ_FE (OPENPIC_EXT_IRQ) /* Internal functional IRQ */ -#define OPENPIC_IRQ_ERR (OPENPIC_EXT_IRQ + 1) /* Error IRQ */ -#define OPENPIC_IRQ_TIM0 (OPENPIC_EXT_IRQ + 2) /* First timer IRQ */ -#if OPENPIC_MAX_IPI > 0 -#define OPENPIC_IRQ_IPI0 (OPENPIC_IRQ_TIM0 + OPENPIC_MAX_TMR) /* First IPI IRQ */ -#define OPENPIC_IRQ_DBL0 (OPENPIC_IRQ_IPI0 + (OPENPIC_MAX_CPU * OPENPIC_MAX_IPI)) /* First doorbell IRQ */ -#else -#define OPENPIC_IRQ_DBL0 (OPENPIC_IRQ_TIM0 + OPENPIC_MAX_TMR) /* First doorbell IRQ */ -#define OPENPIC_IRQ_MBX0 (OPENPIC_IRQ_DBL0 + OPENPIC_MAX_DBL) /* First mailbox IRQ */ -#endif - -/* MPIC */ -#define MPIC_MAX_CPU 1 -#define MPIC_MAX_EXT 12 -#define MPIC_MAX_INT 64 -#define MPIC_MAX_MSG 4 -#define MPIC_MAX_MSI 8 -#define MPIC_MAX_TMR MAX_TMR -#define MPIC_MAX_IPI MAX_IPI -#define MPIC_MAX_IRQ (MPIC_MAX_EXT + MPIC_MAX_INT + MPIC_MAX_TMR + MPIC_MAX_MSG + MPIC_MAX_MSI + (MPIC_MAX_IPI * MPIC_MAX_CPU)) +#define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */ +#define RAVEN_ERR_IRQ (RAVEN_MAX_EXT + 1) /* Error IRQ */ +#define RAVEN_TMR_IRQ (RAVEN_MAX_EXT + 2) /* First timer IRQ */ +#define RAVEN_IPI_IRQ (RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */ +/* First doorbell IRQ */ +#define RAVEN_DBL_IRQ (RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI)) + +/* FSL_MPIC_20 */ +#define FSL_MPIC_20_MAX_CPU 1 +#define FSL_MPIC_20_MAX_EXT 12 +#define FSL_MPIC_20_MAX_INT 64 +#define FSL_MPIC_20_MAX_IRQ MAX_IRQ /* Interrupt definitions */ -#define MPIC_EXT_IRQ 0 -#define MPIC_INT_IRQ (MPIC_EXT_IRQ + MPIC_MAX_EXT) -#define MPIC_TMR_IRQ (MPIC_INT_IRQ + MPIC_MAX_INT) -#define MPIC_MSG_IRQ (MPIC_TMR_IRQ + MPIC_MAX_TMR) -#define MPIC_MSI_IRQ (MPIC_MSG_IRQ + MPIC_MAX_MSG) -#define MPIC_IPI_IRQ (MPIC_MSI_IRQ + MPIC_MAX_MSI) - -#define MPIC_GLB_REG_START 0x0 -#define MPIC_GLB_REG_SIZE 0x10F0 -#define MPIC_TMR_REG_START 0x10F0 -#define MPIC_TMR_REG_SIZE 0x220 -#define MPIC_EXT_REG_START 0x10000 -#define MPIC_EXT_REG_SIZE 0x180 -#define MPIC_INT_REG_START 0x10200 -#define MPIC_INT_REG_SIZE 0x800 -#define MPIC_MSG_REG_START 0x11600 -#define MPIC_MSG_REG_SIZE 0x100 -#define MPIC_MSI_REG_START 0x11C00 -#define MPIC_MSI_REG_SIZE 0x100 -#define MPIC_CPU_REG_START 0x20000 -#define MPIC_CPU_REG_SIZE 0x100 + ((MAX_CPU - 1) * 0x1000) +/* IRQs, accessible through the IRQ region */ +#define FSL_MPIC_20_EXT_IRQ 0x00 +#define FSL_MPIC_20_INT_IRQ 0x10 +#define FSL_MPIC_20_MSG_IRQ 0xb0 +#define FSL_MPIC_20_MSI_IRQ 0xe0 +/* These are available through separate regions, but + for simplicity's sake mapped into the same number space */ +#define FSL_MPIC_20_TMR_IRQ 0x100 +#define FSL_MPIC_20_IPI_IRQ 0x104 /* * Block Revision Register1 (BRR1): QEMU does not fully emulate @@ -141,34 +115,42 @@ enum { #define FSL_BRR1_IPMJ (0x00 << 8) /* 8 bit IP major number */ #define FSL_BRR1_IPMN 0x00 /* 8 bit IP minor number */ -enum mpic_ide_bits { - IDR_EP = 31, - IDR_CI0 = 30, - IDR_CI1 = 29, - IDR_P1 = 1, - IDR_P0 = 0, -}; +#define FREP_NIRQ_SHIFT 16 +#define FREP_NCPU_SHIFT 8 +#define FREP_VID_SHIFT 0 -#else -#error "Please select which OpenPic implementation is to be emulated" -#endif +#define VID_REVISION_1_2 2 +#define VID_REVISION_1_3 3 -#define OPENPIC_PAGE_SIZE 4096 +#define VENI_GENERIC 0x00000000 /* Generic Vendor ID */ + +#define IDR_EP_SHIFT 31 +#define IDR_EP_MASK (1 << IDR_EP_SHIFT) +#define IDR_CI0_SHIFT 30 +#define IDR_CI1_SHIFT 29 +#define IDR_P1_SHIFT 1 +#define IDR_P0_SHIFT 0 + +#define MSIIR_OFFSET 0x140 +#define MSIIR_SRS_SHIFT 29 +#define MSIIR_SRS_MASK (0x7 << MSIIR_SRS_SHIFT) +#define MSIIR_IBS_SHIFT 24 +#define MSIIR_IBS_MASK (0x1f << MSIIR_IBS_SHIFT) #define BF_WIDTH(_bits_) \ (((_bits_) + (sizeof(uint32_t) * 8) - 1) / (sizeof(uint32_t) * 8)) -static inline void set_bit (uint32_t *field, int bit) +static inline void set_bit(uint32_t *field, int bit) { field[bit >> 5] |= 1 << (bit & 0x1F); } -static inline void reset_bit (uint32_t *field, int bit) +static inline void reset_bit(uint32_t *field, int bit) { field[bit >> 5] &= ~(1 << (bit & 0x1F)); } -static inline int test_bit (uint32_t *field, int bit) +static inline int test_bit(uint32_t *field, int bit) { return (field[bit >> 5] & 1 << (bit & 0x1F)) != 0; } @@ -183,41 +165,37 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, static void openpic_cpu_write_internal(void *opaque, hwaddr addr, uint32_t val, int idx); -enum { - IRQ_EXTERNAL = 0x01, - IRQ_INTERNAL = 0x02, - IRQ_TIMER = 0x04, - IRQ_SPECIAL = 0x08, -}; - typedef struct IRQ_queue_t { uint32_t queue[BF_WIDTH(MAX_IRQ)]; int next; int priority; + int pending; /* nr of pending bits in queue */ } IRQ_queue_t; typedef struct IRQ_src_t { uint32_t ipvp; /* IRQ vector/priority register */ uint32_t ide; /* IRQ destination register */ - int type; int last_cpu; int pending; /* TRUE if IRQ is pending */ } IRQ_src_t; -enum IPVP_bits { - IPVP_MASK = 31, - IPVP_ACTIVITY = 30, - IPVP_MODE = 29, - IPVP_POLARITY = 23, - IPVP_SENSE = 22, -}; +#define IPVP_MASK_SHIFT 31 +#define IPVP_MASK_MASK (1 << IPVP_MASK_SHIFT) +#define IPVP_ACTIVITY_SHIFT 30 +#define IPVP_ACTIVITY_MASK (1 << IPVP_ACTIVITY_SHIFT) +#define IPVP_MODE_SHIFT 29 +#define IPVP_MODE_MASK (1 << IPVP_MODE_SHIFT) +#define IPVP_POLARITY_SHIFT 23 +#define IPVP_POLARITY_MASK (1 << IPVP_POLARITY_SHIFT) +#define IPVP_SENSE_SHIFT 22 +#define IPVP_SENSE_MASK (1 << IPVP_SENSE_SHIFT) + #define IPVP_PRIORITY_MASK (0x1F << 16) #define IPVP_PRIORITY(_ipvpr_) ((int)(((_ipvpr_) & IPVP_PRIORITY_MASK) >> 16)) #define IPVP_VECTOR_MASK ((1 << VECTOR_BITS) - 1) #define IPVP_VECTOR(_ipvpr_) ((_ipvpr_) & IPVP_VECTOR_MASK) typedef struct IRQ_dst_t { - uint32_t tfrr; uint32_t pctp; /* CPU current task priority */ uint32_t pcsr; /* CPU sensitivity register */ IRQ_queue_t raised; @@ -225,18 +203,28 @@ typedef struct IRQ_dst_t { qemu_irq *irqs; } IRQ_dst_t; -typedef struct openpic_t { - PCIDevice pci_dev; +typedef struct OpenPICState { + SysBusDevice busdev; MemoryRegion mem; + /* Behavior control */ + uint32_t model; + uint32_t flags; + uint32_t nb_irqs; + uint32_t vid; + uint32_t veni; /* Vendor identification register */ + uint32_t spve_mask; + uint32_t tifr_reset; + uint32_t ipvp_reset; + uint32_t ide_reset; + uint32_t brr1; + /* Sub-regions */ - MemoryRegion sub_io_mem[7]; + MemoryRegion sub_io_mem[5]; /* Global registers */ uint32_t frep; /* Feature reporting register */ uint32_t glbc; /* Global configuration register */ - uint32_t micr; /* MPIC interrupt configuration register */ - uint32_t veni; /* Vendor identification register */ uint32_t pint; /* Processor initialization register */ uint32_t spve; /* Spurious vector register */ uint32_t tifr; /* Timer frequency reporting register */ @@ -244,56 +232,54 @@ typedef struct openpic_t { IRQ_src_t src[MAX_IRQ]; /* Local registers per output pin */ IRQ_dst_t dst[MAX_CPU]; - int nb_cpus; + uint32_t nb_cpus; /* Timer registers */ struct { uint32_t ticc; /* Global timer current count register */ uint32_t tibc; /* Global timer base count register */ } timers[MAX_TMR]; -#if MAX_DBL > 0 - /* Doorbell registers */ - uint32_t dar; /* Doorbell activate register */ + /* Shared MSI registers */ struct { - uint32_t dmr; /* Doorbell messaging register */ - } doorbells[MAX_DBL]; -#endif -#if MAX_MBX > 0 - /* Mailbox registers */ - struct { - uint32_t mbr; /* Mailbox register */ - } mailboxes[MAX_MAILBOXES]; -#endif - /* IRQ out is used when in bypass mode (not implemented) */ - qemu_irq irq_out; - int max_irq; - int irq_ipi0; - int irq_tim0; - void (*reset) (void *); - void (*irq_raise) (struct openpic_t *, int, IRQ_src_t *); -} openpic_t; - -static inline void IRQ_setbit (IRQ_queue_t *q, int n_IRQ) + uint32_t msir; /* Shared Message Signaled Interrupt Register */ + } msi[MAX_MSI]; + uint32_t max_irq; + uint32_t irq_ipi0; + uint32_t irq_tim0; + uint32_t irq_msi; +} OpenPICState; + +static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src); + +static inline void IRQ_setbit(IRQ_queue_t *q, int n_IRQ) { + q->pending++; set_bit(q->queue, n_IRQ); } -static inline void IRQ_resetbit (IRQ_queue_t *q, int n_IRQ) +static inline void IRQ_resetbit(IRQ_queue_t *q, int n_IRQ) { + q->pending--; reset_bit(q->queue, n_IRQ); } -static inline int IRQ_testbit (IRQ_queue_t *q, int n_IRQ) +static inline int IRQ_testbit(IRQ_queue_t *q, int n_IRQ) { return test_bit(q->queue, n_IRQ); } -static void IRQ_check (openpic_t *opp, IRQ_queue_t *q) +static void IRQ_check(OpenPICState *opp, IRQ_queue_t *q) { int next, i; int priority; next = -1; priority = -1; + + if (!q->pending) { + /* IRQ bitmap is empty */ + goto out; + } + for (i = 0; i < opp->max_irq; i++) { if (IRQ_testbit(q, i)) { DPRINTF("IRQ_check: irq %d set ipvp_pr=%d pr=%d\n", @@ -304,11 +290,13 @@ static void IRQ_check (openpic_t *opp, IRQ_queue_t *q) } } } + +out: q->next = next; q->priority = priority; } -static int IRQ_get_next (openpic_t *opp, IRQ_queue_t *q) +static int IRQ_get_next(OpenPICState *opp, IRQ_queue_t *q) { if (q->next == -1) { /* XXX: optimize */ @@ -318,7 +306,7 @@ static int IRQ_get_next (openpic_t *opp, IRQ_queue_t *q) return q->next; } -static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ) +static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ) { IRQ_dst_t *dst; IRQ_src_t *src; @@ -339,7 +327,7 @@ static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ) __func__, n_IRQ, n_CPU); return; } - set_bit(&src->ipvp, IPVP_ACTIVITY); + src->ipvp |= IPVP_ACTIVITY_MASK; IRQ_setbit(&dst->raised, n_IRQ); if (priority < dst->raised.priority) { /* An higher priority IRQ is already raised */ @@ -356,11 +344,11 @@ static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ) return; } DPRINTF("Raise OpenPIC INT output cpu %d irq %d\n", n_CPU, n_IRQ); - opp->irq_raise(opp, n_CPU, src); + openpic_irq_raise(opp, n_CPU, src); } /* update pic state because registers for n_IRQ have changed value */ -static void openpic_update_irq(openpic_t *opp, int n_IRQ) +static void openpic_update_irq(OpenPICState *opp, int n_IRQ) { IRQ_src_t *src; int i; @@ -372,7 +360,7 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ) DPRINTF("%s: IRQ %d is not pending\n", __func__, n_IRQ); return; } - if (test_bit(&src->ipvp, IPVP_MASK)) { + if (src->ipvp & IPVP_MASK_MASK) { /* Interrupt source is disabled */ DPRINTF("%s: IRQ %d is disabled\n", __func__, n_IRQ); return; @@ -382,7 +370,7 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ) DPRINTF("%s: IRQ %d has 0 priority\n", __func__, n_IRQ); return; } - if (test_bit(&src->ipvp, IPVP_ACTIVITY)) { + if (src->ipvp & IPVP_ACTIVITY_MASK) { /* IRQ already active */ DPRINTF("%s: IRQ %d is already active\n", __func__, n_IRQ); return; @@ -396,18 +384,19 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ) if (src->ide == (1 << src->last_cpu)) { /* Only one CPU is allowed to receive this IRQ */ IRQ_local_pipe(opp, src->last_cpu, n_IRQ); - } else if (!test_bit(&src->ipvp, IPVP_MODE)) { + } else if (!(src->ipvp & IPVP_MODE_MASK)) { /* Directed delivery mode */ for (i = 0; i < opp->nb_cpus; i++) { - if (test_bit(&src->ide, i)) + if (src->ide & (1 << i)) { IRQ_local_pipe(opp, i, n_IRQ); + } } } else { /* Distributed delivery mode */ for (i = src->last_cpu + 1; i != src->last_cpu; i++) { if (i == opp->nb_cpus) i = 0; - if (test_bit(&src->ide, i)) { + if (src->ide & (1 << i)) { IRQ_local_pipe(opp, i, n_IRQ); src->last_cpu = i; break; @@ -418,17 +407,18 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ) static void openpic_set_irq(void *opaque, int n_IRQ, int level) { - openpic_t *opp = opaque; + OpenPICState *opp = opaque; IRQ_src_t *src; src = &opp->src[n_IRQ]; DPRINTF("openpic: set irq %d = %d ipvp=%08x\n", n_IRQ, level, src->ipvp); - if (test_bit(&src->ipvp, IPVP_SENSE)) { + if (src->ipvp & IPVP_SENSE_MASK) { /* level-sensitive irq */ src->pending = level; - if (!level) - reset_bit(&src->ipvp, IPVP_ACTIVITY); + if (!level) { + src->ipvp &= ~IPVP_ACTIVITY_MASK; + } } else { /* edge-sensitive irq */ if (level) @@ -437,24 +427,24 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) openpic_update_irq(opp, n_IRQ); } -static void openpic_reset (void *opaque) +static void openpic_reset(DeviceState *d) { - openpic_t *opp = (openpic_t *)opaque; + OpenPICState *opp = FROM_SYSBUS(typeof (*opp), sysbus_from_qdev(d)); int i; opp->glbc = 0x80000000; /* Initialise controller registers */ - opp->frep = ((OPENPIC_EXT_IRQ - 1) << 16) | ((MAX_CPU - 1) << 8) | VID; - opp->veni = VENI; + opp->frep = ((opp->nb_irqs -1) << FREP_NIRQ_SHIFT) | + ((opp->nb_cpus -1) << FREP_NCPU_SHIFT) | + (opp->vid << FREP_VID_SHIFT); + opp->pint = 0x00000000; - opp->spve = 0x000000FF; - opp->tifr = 0x003F7A00; - /* ? */ - opp->micr = 0x00000000; + opp->spve = -1 & opp->spve_mask; + opp->tifr = opp->tifr_reset; /* Initialise IRQ sources */ for (i = 0; i < opp->max_irq; i++) { - opp->src[i].ipvp = 0xA0000000; - opp->src[i].ide = 0x00000000; + opp->src[i].ipvp = opp->ipvp_reset; + opp->src[i].ide = opp->ide_reset; } /* Initialise IRQ destinations */ for (i = 0; i < MAX_CPU; i++) { @@ -470,34 +460,21 @@ static void openpic_reset (void *opaque) opp->timers[i].ticc = 0x00000000; opp->timers[i].tibc = 0x80000000; } - /* Initialise doorbells */ -#if MAX_DBL > 0 - opp->dar = 0x00000000; - for (i = 0; i < MAX_DBL; i++) { - opp->doorbells[i].dmr = 0x00000000; - } -#endif - /* Initialise mailboxes */ -#if MAX_MBX > 0 - for (i = 0; i < MAX_MBX; i++) { /* ? */ - opp->mailboxes[i].mbr = 0x00000000; - } -#endif /* Go out of RESET state */ opp->glbc = 0x00000000; } -static inline uint32_t read_IRQreg_ide(openpic_t *opp, int n_IRQ) +static inline uint32_t read_IRQreg_ide(OpenPICState *opp, int n_IRQ) { return opp->src[n_IRQ].ide; } -static inline uint32_t read_IRQreg_ipvp(openpic_t *opp, int n_IRQ) +static inline uint32_t read_IRQreg_ipvp(OpenPICState *opp, int n_IRQ) { return opp->src[n_IRQ].ipvp; } -static inline void write_IRQreg_ide(openpic_t *opp, int n_IRQ, uint32_t val) +static inline void write_IRQreg_ide(OpenPICState *opp, int n_IRQ, uint32_t val) { uint32_t tmp; @@ -507,7 +484,7 @@ static inline void write_IRQreg_ide(openpic_t *opp, int n_IRQ, uint32_t val) DPRINTF("Set IDE %d to 0x%08x\n", n_IRQ, opp->src[n_IRQ].ide); } -static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val) +static inline void write_IRQreg_ipvp(OpenPICState *opp, int n_IRQ, uint32_t val) { /* NOTE: not fully accurate for special IRQs, but simple and sufficient */ /* ACTIVITY bit is read-only */ @@ -518,87 +495,10 @@ static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val) opp->src[n_IRQ].ipvp); } -#if 0 // Code provision for Intel model -#if MAX_DBL > 0 -static uint32_t read_doorbell_register (openpic_t *opp, - int n_dbl, uint32_t offset) -{ - uint32_t retval; - - switch (offset) { - case DBL_IPVP_OFFSET: - retval = read_IRQreg_ipvp(opp, IRQ_DBL0 + n_dbl); - break; - case DBL_IDE_OFFSET: - retval = read_IRQreg_ide(opp, IRQ_DBL0 + n_dbl); - break; - case DBL_DMR_OFFSET: - retval = opp->doorbells[n_dbl].dmr; - break; - } - - return retval; -} - -static void write_doorbell_register (penpic_t *opp, int n_dbl, - uint32_t offset, uint32_t value) -{ - switch (offset) { - case DBL_IVPR_OFFSET: - write_IRQreg_ipvp(opp, IRQ_DBL0 + n_dbl, value); - break; - case DBL_IDE_OFFSET: - write_IRQreg_ide(opp, IRQ_DBL0 + n_dbl, value); - break; - case DBL_DMR_OFFSET: - opp->doorbells[n_dbl].dmr = value; - break; - } -} -#endif - -#if MAX_MBX > 0 -static uint32_t read_mailbox_register (openpic_t *opp, - int n_mbx, uint32_t offset) -{ - uint32_t retval; - - switch (offset) { - case MBX_MBR_OFFSET: - retval = opp->mailboxes[n_mbx].mbr; - break; - case MBX_IVPR_OFFSET: - retval = read_IRQreg_ipvp(opp, IRQ_MBX0 + n_mbx); - break; - case MBX_DMR_OFFSET: - retval = read_IRQreg_ide(opp, IRQ_MBX0 + n_mbx); - break; - } - - return retval; -} - -static void write_mailbox_register (openpic_t *opp, int n_mbx, - uint32_t address, uint32_t value) -{ - switch (offset) { - case MBX_MBR_OFFSET: - opp->mailboxes[n_mbx].mbr = value; - break; - case MBX_IVPR_OFFSET: - write_IRQreg_ipvp(opp, IRQ_MBX0 + n_mbx, value); - break; - case MBX_DMR_OFFSET: - write_IRQreg_ide(opp, IRQ_MBX0 + n_mbx, value); - break; - } -} -#endif -#endif /* 0 : Code provision for Intel model */ - -static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val) +static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val, + unsigned len) { - openpic_t *opp = opaque; + OpenPICState *opp = opaque; IRQ_dst_t *dst; int idx; @@ -621,9 +521,9 @@ static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val) case 0x1000: /* FREP */ break; case 0x1020: /* GLBC */ - if (val & 0x80000000 && opp->reset) - opp->reset(opp); - opp->glbc = val & ~0x80000000; + if (val & 0x80000000) { + openpic_reset(&opp->busdev.qdev); + } break; case 0x1080: /* VENI */ break; @@ -652,19 +552,16 @@ static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val) } break; case 0x10E0: /* SPVE */ - opp->spve = val & 0x000000FF; - break; - case 0x10F0: /* TIFR */ - opp->tifr = val; + opp->spve = val & opp->spve_mask; break; default: break; } } -static uint32_t openpic_gbl_read (void *opaque, hwaddr addr) +static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len) { - openpic_t *opp = opaque; + OpenPICState *opp = opaque; uint32_t retval; DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr); @@ -708,9 +605,6 @@ static uint32_t openpic_gbl_read (void *opaque, hwaddr addr) case 0x10E0: /* SPVE */ retval = opp->spve; break; - case 0x10F0: /* TIFR */ - retval = opp->tifr; - break; default: break; } @@ -719,73 +613,83 @@ static uint32_t openpic_gbl_read (void *opaque, hwaddr addr) return retval; } -static void openpic_timer_write (void *opaque, uint32_t addr, uint32_t val) +static void openpic_tmr_write(void *opaque, hwaddr addr, uint64_t val, + unsigned len) { - openpic_t *opp = opaque; + OpenPICState *opp = opaque; int idx; DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val); if (addr & 0xF) return; - addr -= 0x10; - addr &= 0xFFFF; - idx = (addr & 0xFFF0) >> 6; + idx = (addr >> 6) & 0x3; addr = addr & 0x30; - switch (addr) { - case 0x00: /* TICC */ + + if (addr == 0x0) { + /* TIFR (TFRR) */ + opp->tifr = val; + return; + } + switch (addr & 0x30) { + case 0x00: /* TICC (GTCCR) */ break; - case 0x10: /* TIBC */ + case 0x10: /* TIBC (GTBCR) */ if ((opp->timers[idx].ticc & 0x80000000) != 0 && (val & 0x80000000) == 0 && (opp->timers[idx].tibc & 0x80000000) != 0) opp->timers[idx].ticc &= ~0x80000000; opp->timers[idx].tibc = val; break; - case 0x20: /* TIVP */ + case 0x20: /* TIVP (GTIVPR) */ write_IRQreg_ipvp(opp, opp->irq_tim0 + idx, val); break; - case 0x30: /* TIDE */ + case 0x30: /* TIDE (GTIDR) */ write_IRQreg_ide(opp, opp->irq_tim0 + idx, val); break; } } -static uint32_t openpic_timer_read (void *opaque, uint32_t addr) +static uint64_t openpic_tmr_read(void *opaque, hwaddr addr, unsigned len) { - openpic_t *opp = opaque; - uint32_t retval; + OpenPICState *opp = opaque; + uint32_t retval = -1; int idx; DPRINTF("%s: addr %08x\n", __func__, addr); - retval = 0xFFFFFFFF; - if (addr & 0xF) - return retval; - addr -= 0x10; - addr &= 0xFFFF; - idx = (addr & 0xFFF0) >> 6; - addr = addr & 0x30; - switch (addr) { - case 0x00: /* TICC */ + if (addr & 0xF) { + goto out; + } + idx = (addr >> 6) & 0x3; + if (addr == 0x0) { + /* TIFR (TFRR) */ + retval = opp->tifr; + goto out; + } + switch (addr & 0x30) { + case 0x00: /* TICC (GTCCR) */ retval = opp->timers[idx].ticc; break; - case 0x10: /* TIBC */ + case 0x10: /* TIBC (GTBCR) */ retval = opp->timers[idx].tibc; break; - case 0x20: /* TIPV */ + case 0x20: /* TIPV (TIPV) */ retval = read_IRQreg_ipvp(opp, opp->irq_tim0 + idx); break; - case 0x30: /* TIDE */ + case 0x30: /* TIDE (TIDR) */ retval = read_IRQreg_ide(opp, opp->irq_tim0 + idx); break; } + +out: DPRINTF("%s: => %08x\n", __func__, retval); return retval; } -static void openpic_src_write (void *opaque, uint32_t addr, uint32_t val) +static void openpic_src_write(void *opaque, hwaddr addr, uint64_t val, + unsigned len) { - openpic_t *opp = opaque; + OpenPICState *opp = opaque; int idx; DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val); @@ -802,9 +706,9 @@ static void openpic_src_write (void *opaque, uint32_t addr, uint32_t val) } } -static uint32_t openpic_src_read (void *opaque, uint32_t addr) +static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len) { - openpic_t *opp = opaque; + OpenPICState *opp = opaque; uint32_t retval; int idx; @@ -826,10 +730,72 @@ static uint32_t openpic_src_read (void *opaque, uint32_t addr) return retval; } +static void openpic_msi_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + OpenPICState *opp = opaque; + int idx = opp->irq_msi; + int srs, ibs; + + DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val); + if (addr & 0xF) { + return; + } + + switch (addr) { + case MSIIR_OFFSET: + srs = val >> MSIIR_SRS_SHIFT; + idx += srs; + ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT; + opp->msi[srs].msir |= 1 << ibs; + openpic_set_irq(opp, idx, 1); + break; + default: + /* most registers are read-only, thus ignored */ + break; + } +} + +static uint64_t openpic_msi_read(void *opaque, hwaddr addr, unsigned size) +{ + OpenPICState *opp = opaque; + uint64_t r = 0; + int i, srs; + + DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr); + if (addr & 0xF) { + return -1; + } + + srs = addr >> 4; + + switch (addr) { + case 0x00: + case 0x10: + case 0x20: + case 0x30: + case 0x40: + case 0x50: + case 0x60: + case 0x70: /* MSIRs */ + r = opp->msi[srs].msir; + /* Clear on read */ + opp->msi[srs].msir = 0; + break; + case 0x120: /* MSISR */ + for (i = 0; i < MAX_MSI; i++) { + r |= (opp->msi[i].msir ? 1 : 0) << i; + } + break; + } + + return r; +} + static void openpic_cpu_write_internal(void *opaque, hwaddr addr, uint32_t val, int idx) { - openpic_t *opp = opaque; + OpenPICState *opp = opaque; IRQ_src_t *src; IRQ_dst_t *dst; int s_IRQ, n_IRQ; @@ -841,7 +807,6 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, dst = &opp->dst[idx]; addr &= 0xFF0; switch (addr) { -#if MAX_IPI > 0 case 0x40: /* IPIDR */ case 0x50: case 0x60: @@ -853,7 +818,6 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, openpic_set_irq(opp, opp->irq_ipi0 + idx, 1); openpic_set_irq(opp, opp->irq_ipi0 + idx, 0); break; -#endif case 0x80: /* PCTP */ dst->pctp = val & 0x0000000F; break; @@ -878,7 +842,7 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, IPVP_PRIORITY(src->ipvp) > dst->servicing.priority)) { DPRINTF("Raise OpenPIC INT output cpu %d irq %d\n", idx, n_IRQ); - opp->irq_raise(opp, idx, src); + openpic_irq_raise(opp, idx, src); } break; default: @@ -886,7 +850,8 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr, } } -static void openpic_cpu_write(void *opaque, hwaddr addr, uint32_t val) +static void openpic_cpu_write(void *opaque, hwaddr addr, uint64_t val, + unsigned len) { openpic_cpu_write_internal(opaque, addr, val, (addr & 0x1f000) >> 12); } @@ -894,7 +859,7 @@ static void openpic_cpu_write(void *opaque, hwaddr addr, uint32_t val) static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, int idx) { - openpic_t *opp = opaque; + OpenPICState *opp = opaque; IRQ_src_t *src; IRQ_dst_t *dst; uint32_t retval; @@ -908,7 +873,7 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, addr &= 0xFF0; switch (addr) { case 0x00: /* Block Revision Register1 (BRR1) */ - retval = FSL_BRR1_IPID | FSL_BRR1_IPMJ | FSL_BRR1_IPMN; + retval = opp->brr1; break; case 0x80: /* PCTP */ retval = dst->pctp; @@ -926,13 +891,13 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, retval = IPVP_VECTOR(opp->spve); } else { src = &opp->src[n_IRQ]; - if (!test_bit(&src->ipvp, IPVP_ACTIVITY) || + if (!(src->ipvp & IPVP_ACTIVITY_MASK) || !(IPVP_PRIORITY(src->ipvp) > dst->pctp)) { /* - Spurious level-sensitive IRQ * - Priorities has been changed * and the pending IRQ isn't allowed anymore */ - reset_bit(&src->ipvp, IPVP_ACTIVITY); + src->ipvp &= ~IPVP_ACTIVITY_MASK; retval = IPVP_VECTOR(opp->spve); } else { /* IRQ enter servicing state */ @@ -941,20 +906,20 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, } IRQ_resetbit(&dst->raised, n_IRQ); dst->raised.next = -1; - if (!test_bit(&src->ipvp, IPVP_SENSE)) { + if (!(src->ipvp & IPVP_SENSE_MASK)) { /* edge-sensitive IRQ */ - reset_bit(&src->ipvp, IPVP_ACTIVITY); + src->ipvp &= ~IPVP_ACTIVITY_MASK; src->pending = 0; } if ((n_IRQ >= opp->irq_ipi0) && (n_IRQ < (opp->irq_ipi0 + MAX_IPI))) { src->ide &= ~(1 << idx); - if (src->ide && !test_bit(&src->ipvp, IPVP_SENSE)) { + if (src->ide && !(src->ipvp & IPVP_SENSE_MASK)) { /* trigger on CPUs that didn't know about it yet */ openpic_set_irq(opp, n_IRQ, 1); openpic_set_irq(opp, n_IRQ, 0); /* if all CPUs knew about it, set active bit again */ - set_bit(&src->ipvp, IPVP_ACTIVITY); + src->ipvp |= IPVP_ACTIVITY_MASK; } } } @@ -970,96 +935,109 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, return retval; } -static uint32_t openpic_cpu_read(void *opaque, hwaddr addr) +static uint64_t openpic_cpu_read(void *opaque, hwaddr addr, unsigned len) { return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12); } -static void openpic_buggy_write (void *opaque, - hwaddr addr, uint32_t val) -{ - printf("Invalid OPENPIC write access !\n"); -} - -static uint32_t openpic_buggy_read (void *opaque, hwaddr addr) -{ - printf("Invalid OPENPIC read access !\n"); - - return -1; -} - -static void openpic_writel (void *opaque, - hwaddr addr, uint32_t val) -{ - openpic_t *opp = opaque; - - addr &= 0x3FFFF; - DPRINTF("%s: offset %08x val: %08x\n", __func__, (int)addr, val); - if (addr < 0x1100) { - /* Global registers */ - openpic_gbl_write(opp, addr, val); - } else if (addr < 0x10000) { - /* Timers registers */ - openpic_timer_write(opp, addr, val); - } else if (addr < 0x20000) { - /* Source registers */ - openpic_src_write(opp, addr, val); - } else { - /* CPU registers */ - openpic_cpu_write(opp, addr, val); - } -} +static const MemoryRegionOps openpic_glb_ops_le = { + .write = openpic_gbl_write, + .read = openpic_gbl_read, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; -static uint32_t openpic_readl (void *opaque,hwaddr addr) -{ - openpic_t *opp = opaque; - uint32_t retval; +static const MemoryRegionOps openpic_glb_ops_be = { + .write = openpic_gbl_write, + .read = openpic_gbl_read, + .endianness = DEVICE_BIG_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; - addr &= 0x3FFFF; - DPRINTF("%s: offset %08x\n", __func__, (int)addr); - if (addr < 0x1100) { - /* Global registers */ - retval = openpic_gbl_read(opp, addr); - } else if (addr < 0x10000) { - /* Timers registers */ - retval = openpic_timer_read(opp, addr); - } else if (addr < 0x20000) { - /* Source registers */ - retval = openpic_src_read(opp, addr); - } else { - /* CPU registers */ - retval = openpic_cpu_read(opp, addr); - } +static const MemoryRegionOps openpic_tmr_ops_le = { + .write = openpic_tmr_write, + .read = openpic_tmr_read, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; - return retval; -} +static const MemoryRegionOps openpic_tmr_ops_be = { + .write = openpic_tmr_write, + .read = openpic_tmr_read, + .endianness = DEVICE_BIG_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; -static uint64_t openpic_read(void *opaque, hwaddr addr, - unsigned size) -{ - openpic_t *opp = opaque; +static const MemoryRegionOps openpic_cpu_ops_le = { + .write = openpic_cpu_write, + .read = openpic_cpu_read, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; - switch (size) { - case 4: return openpic_readl(opp, addr); - default: return openpic_buggy_read(opp, addr); - } -} +static const MemoryRegionOps openpic_cpu_ops_be = { + .write = openpic_cpu_write, + .read = openpic_cpu_read, + .endianness = DEVICE_BIG_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; -static void openpic_write(void *opaque, hwaddr addr, - uint64_t data, unsigned size) -{ - openpic_t *opp = opaque; +static const MemoryRegionOps openpic_src_ops_le = { + .write = openpic_src_write, + .read = openpic_src_read, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; - switch (size) { - case 4: return openpic_writel(opp, addr, data); - default: return openpic_buggy_write(opp, addr, data); - } -} +static const MemoryRegionOps openpic_src_ops_be = { + .write = openpic_src_write, + .read = openpic_src_read, + .endianness = DEVICE_BIG_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; -static const MemoryRegionOps openpic_ops = { - .read = openpic_read, - .write = openpic_write, +static const MemoryRegionOps openpic_msi_ops_le = { + .read = openpic_msi_read, + .write = openpic_msi_write, .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps openpic_msi_ops_be = { + .read = openpic_msi_read, + .write = openpic_msi_write, + .endianness = DEVICE_BIG_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, }; static void openpic_save_IRQ_queue(QEMUFile* f, IRQ_queue_t *q) @@ -1075,12 +1053,10 @@ static void openpic_save_IRQ_queue(QEMUFile* f, IRQ_queue_t *q) static void openpic_save(QEMUFile* f, void *opaque) { - openpic_t *opp = (openpic_t *)opaque; + OpenPICState *opp = (OpenPICState *)opaque; unsigned int i; - qemu_put_be32s(f, &opp->frep); qemu_put_be32s(f, &opp->glbc); - qemu_put_be32s(f, &opp->micr); qemu_put_be32s(f, &opp->veni); qemu_put_be32s(f, &opp->pint); qemu_put_be32s(f, &opp->spve); @@ -1089,15 +1065,13 @@ static void openpic_save(QEMUFile* f, void *opaque) for (i = 0; i < opp->max_irq; i++) { qemu_put_be32s(f, &opp->src[i].ipvp); qemu_put_be32s(f, &opp->src[i].ide); - qemu_put_sbe32s(f, &opp->src[i].type); qemu_put_sbe32s(f, &opp->src[i].last_cpu); qemu_put_sbe32s(f, &opp->src[i].pending); } - qemu_put_sbe32s(f, &opp->nb_cpus); + qemu_put_be32s(f, &opp->nb_cpus); for (i = 0; i < opp->nb_cpus; i++) { - qemu_put_be32s(f, &opp->dst[i].tfrr); qemu_put_be32s(f, &opp->dst[i].pctp); qemu_put_be32s(f, &opp->dst[i].pcsr); openpic_save_IRQ_queue(f, &opp->dst[i].raised); @@ -1108,22 +1082,6 @@ static void openpic_save(QEMUFile* f, void *opaque) qemu_put_be32s(f, &opp->timers[i].ticc); qemu_put_be32s(f, &opp->timers[i].tibc); } - -#if MAX_DBL > 0 - qemu_put_be32s(f, &opp->dar); - - for (i = 0; i < MAX_DBL; i++) { - qemu_put_be32s(f, &opp->doorbells[i].dmr); - } -#endif - -#if MAX_MBX > 0 - for (i = 0; i < MAX_MAILBOXES; i++) { - qemu_put_be32s(f, &opp->mailboxes[i].mbr); - } -#endif - - pci_device_save(&opp->pci_dev, f); } static void openpic_load_IRQ_queue(QEMUFile* f, IRQ_queue_t *q) @@ -1139,15 +1097,13 @@ static void openpic_load_IRQ_queue(QEMUFile* f, IRQ_queue_t *q) static int openpic_load(QEMUFile* f, void *opaque, int version_id) { - openpic_t *opp = (openpic_t *)opaque; + OpenPICState *opp = (OpenPICState *)opaque; unsigned int i; if (version_id != 1) return -EINVAL; - qemu_get_be32s(f, &opp->frep); qemu_get_be32s(f, &opp->glbc); - qemu_get_be32s(f, &opp->micr); qemu_get_be32s(f, &opp->veni); qemu_get_be32s(f, &opp->pint); qemu_get_be32s(f, &opp->spve); @@ -1156,15 +1112,13 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id) for (i = 0; i < opp->max_irq; i++) { qemu_get_be32s(f, &opp->src[i].ipvp); qemu_get_be32s(f, &opp->src[i].ide); - qemu_get_sbe32s(f, &opp->src[i].type); qemu_get_sbe32s(f, &opp->src[i].last_cpu); qemu_get_sbe32s(f, &opp->src[i].pending); } - qemu_get_sbe32s(f, &opp->nb_cpus); + qemu_get_be32s(f, &opp->nb_cpus); for (i = 0; i < opp->nb_cpus; i++) { - qemu_get_be32s(f, &opp->dst[i].tfrr); qemu_get_be32s(f, &opp->dst[i].pctp); qemu_get_be32s(f, &opp->dst[i].pcsr); openpic_load_IRQ_queue(f, &opp->dst[i].raised); @@ -1176,535 +1130,156 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id) qemu_get_be32s(f, &opp->timers[i].tibc); } -#if MAX_DBL > 0 - qemu_get_be32s(f, &opp->dar); - - for (i = 0; i < MAX_DBL; i++) { - qemu_get_be32s(f, &opp->doorbells[i].dmr); - } -#endif - -#if MAX_MBX > 0 - for (i = 0; i < MAX_MAILBOXES; i++) { - qemu_get_be32s(f, &opp->mailboxes[i].mbr); - } -#endif - - return pci_device_load(&opp->pci_dev, f); -} - -static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src) -{ - qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]); -} - -qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus, - qemu_irq **irqs, qemu_irq irq_out) -{ - openpic_t *opp; - int i, m; - - /* XXX: for now, only one CPU is supported */ - if (nb_cpus != 1) - return NULL; - opp = g_malloc0(sizeof(openpic_t)); - memory_region_init_io(&opp->mem, &openpic_ops, opp, "openpic", 0x40000); - - // isu_base &= 0xFFFC0000; - opp->nb_cpus = nb_cpus; - opp->max_irq = OPENPIC_MAX_IRQ; - opp->irq_ipi0 = OPENPIC_IRQ_IPI0; - opp->irq_tim0 = OPENPIC_IRQ_TIM0; - /* Set IRQ types */ - for (i = 0; i < OPENPIC_EXT_IRQ; i++) { - opp->src[i].type = IRQ_EXTERNAL; - } - for (; i < OPENPIC_IRQ_TIM0; i++) { - opp->src[i].type = IRQ_SPECIAL; - } -#if MAX_IPI > 0 - m = OPENPIC_IRQ_IPI0; -#else - m = OPENPIC_IRQ_DBL0; -#endif - for (; i < m; i++) { - opp->src[i].type = IRQ_TIMER; - } - for (; i < OPENPIC_MAX_IRQ; i++) { - opp->src[i].type = IRQ_INTERNAL; - } - for (i = 0; i < nb_cpus; i++) - opp->dst[i].irqs = irqs[i]; - opp->irq_out = irq_out; - - register_savevm(&opp->pci_dev.qdev, "openpic", 0, 2, - openpic_save, openpic_load, opp); - qemu_register_reset(openpic_reset, opp); - - opp->irq_raise = openpic_irq_raise; - opp->reset = openpic_reset; - - if (pmem) - *pmem = &opp->mem; - - return qemu_allocate_irqs(openpic_set_irq, opp, opp->max_irq); -} - -static void mpic_irq_raise(openpic_t *mpp, int n_CPU, IRQ_src_t *src) -{ - int n_ci = IDR_CI0 - n_CPU; - - if(test_bit(&src->ide, n_ci)) { - qemu_irq_raise(mpp->dst[n_CPU].irqs[OPENPIC_OUTPUT_CINT]); - } - else { - qemu_irq_raise(mpp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]); - } + return 0; } -static void mpic_reset (void *opaque) +static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src) { - openpic_t *mpp = (openpic_t *)opaque; - int i; + int n_ci = IDR_CI0_SHIFT - n_CPU; - mpp->glbc = 0x80000000; - /* Initialise controller registers */ - mpp->frep = 0x004f0002 | ((mpp->nb_cpus - 1) << 8); - mpp->veni = VENI; - mpp->pint = 0x00000000; - mpp->spve = 0x0000FFFF; - /* Initialise IRQ sources */ - for (i = 0; i < mpp->max_irq; i++) { - mpp->src[i].ipvp = 0x80800000; - mpp->src[i].ide = 0x00000001; - } - /* Set IDE for IPIs to 0 so we don't get spurious interrupts */ - for (i = mpp->irq_ipi0; i < (mpp->irq_ipi0 + MAX_IPI); i++) { - mpp->src[i].ide = 0; - } - /* Initialise IRQ destinations */ - for (i = 0; i < MAX_CPU; i++) { - mpp->dst[i].pctp = 0x0000000F; - mpp->dst[i].tfrr = 0x00000000; - memset(&mpp->dst[i].raised, 0, sizeof(IRQ_queue_t)); - mpp->dst[i].raised.next = -1; - memset(&mpp->dst[i].servicing, 0, sizeof(IRQ_queue_t)); - mpp->dst[i].servicing.next = -1; - } - /* Initialise timers */ - for (i = 0; i < MAX_TMR; i++) { - mpp->timers[i].ticc = 0x00000000; - mpp->timers[i].tibc = 0x80000000; + if ((opp->flags & OPENPIC_FLAG_IDE_CRIT) && (src->ide & (1 << n_ci))) { + qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_CINT]); + } else { + qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]); } - /* Go out of RESET state */ - mpp->glbc = 0x00000000; } -static void mpic_timer_write (void *opaque, hwaddr addr, uint32_t val) -{ - openpic_t *mpp = opaque; - int idx, cpu; - - DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val); - if (addr & 0xF) - return; - addr &= 0xFFFF; - cpu = addr >> 12; - idx = (addr >> 6) & 0x3; - switch (addr & 0x30) { - case 0x00: /* gtccr */ - break; - case 0x10: /* gtbcr */ - if ((mpp->timers[idx].ticc & 0x80000000) != 0 && - (val & 0x80000000) == 0 && - (mpp->timers[idx].tibc & 0x80000000) != 0) - mpp->timers[idx].ticc &= ~0x80000000; - mpp->timers[idx].tibc = val; - break; - case 0x20: /* GTIVPR */ - write_IRQreg_ipvp(mpp, MPIC_TMR_IRQ + idx, val); - break; - case 0x30: /* GTIDR & TFRR */ - if ((addr & 0xF0) == 0xF0) - mpp->dst[cpu].tfrr = val; - else - write_IRQreg_ide(mpp, MPIC_TMR_IRQ + idx, val); - break; - } -} +struct memreg { + const char *name; + MemoryRegionOps const *ops; + bool map; + hwaddr start_addr; + ram_addr_t size; +}; -static uint32_t mpic_timer_read (void *opaque, hwaddr addr) +static int openpic_init(SysBusDevice *dev) { - openpic_t *mpp = opaque; - uint32_t retval; - int idx, cpu; + OpenPICState *opp = FROM_SYSBUS(typeof (*opp), dev); + int i, j; + struct memreg list_le[] = { + {"glb", &openpic_glb_ops_le, true, + OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE}, + {"tmr", &openpic_tmr_ops_le, true, + OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE}, + {"msi", &openpic_msi_ops_le, true, + OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE}, + {"src", &openpic_src_ops_le, true, + OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE}, + {"cpu", &openpic_cpu_ops_le, true, + OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE}, + }; + struct memreg list_be[] = { + {"glb", &openpic_glb_ops_be, true, + OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE}, + {"tmr", &openpic_tmr_ops_be, true, + OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE}, + {"msi", &openpic_msi_ops_be, true, + OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE}, + {"src", &openpic_src_ops_be, true, + OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE}, + {"cpu", &openpic_cpu_ops_be, true, + OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE}, + }; + struct memreg *list; - DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr); - retval = 0xFFFFFFFF; - if (addr & 0xF) - return retval; - addr &= 0xFFFF; - cpu = addr >> 12; - idx = (addr >> 6) & 0x3; - switch (addr & 0x30) { - case 0x00: /* gtccr */ - retval = mpp->timers[idx].ticc; - break; - case 0x10: /* gtbcr */ - retval = mpp->timers[idx].tibc; - break; - case 0x20: /* TIPV */ - retval = read_IRQreg_ipvp(mpp, MPIC_TMR_IRQ + idx); - break; - case 0x30: /* TIDR */ - if ((addr &0xF0) == 0XF0) - retval = mpp->dst[cpu].tfrr; - else - retval = read_IRQreg_ide(mpp, MPIC_TMR_IRQ + idx); + switch (opp->model) { + case OPENPIC_MODEL_FSL_MPIC_20: + default: + opp->flags |= OPENPIC_FLAG_IDE_CRIT; + opp->nb_irqs = 80; + opp->vid = VID_REVISION_1_2; + opp->veni = VENI_GENERIC; + opp->spve_mask = 0xFFFF; + opp->tifr_reset = 0x00000000; + opp->ipvp_reset = 0x80000000; + opp->ide_reset = 0x00000001; + opp->max_irq = FSL_MPIC_20_MAX_IRQ; + opp->irq_ipi0 = FSL_MPIC_20_IPI_IRQ; + opp->irq_tim0 = FSL_MPIC_20_TMR_IRQ; + opp->irq_msi = FSL_MPIC_20_MSI_IRQ; + opp->brr1 = FSL_BRR1_IPID | FSL_BRR1_IPMJ | FSL_BRR1_IPMN; + msi_supported = true; + list = list_be; break; - } - DPRINTF("%s: => %08x\n", __func__, retval); - - return retval; -} - -static void mpic_src_ext_write (void *opaque, hwaddr addr, - uint32_t val) -{ - openpic_t *mpp = opaque; - int idx = MPIC_EXT_IRQ; - - DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val); - if (addr & 0xF) - return; - - if (addr < MPIC_EXT_REG_SIZE) { - idx += (addr & 0xFFF0) >> 5; - if (addr & 0x10) { - /* EXDE / IFEDE / IEEDE */ - write_IRQreg_ide(mpp, idx, val); - } else { - /* EXVP / IFEVP / IEEVP */ - write_IRQreg_ipvp(mpp, idx, val); + case OPENPIC_MODEL_RAVEN: + opp->nb_irqs = RAVEN_MAX_EXT; + opp->vid = VID_REVISION_1_3; + opp->veni = VENI_GENERIC; + opp->spve_mask = 0xFF; + opp->tifr_reset = 0x003F7A00; + opp->ipvp_reset = 0xA0000000; + opp->ide_reset = 0x00000000; + opp->max_irq = RAVEN_MAX_IRQ; + opp->irq_ipi0 = RAVEN_IPI_IRQ; + opp->irq_tim0 = RAVEN_TMR_IRQ; + opp->brr1 = -1; + list = list_le; + /* Don't map MSI region */ + list[2].map = false; + + /* Only UP supported today */ + if (opp->nb_cpus != 1) { + return -EINVAL; } + break; } -} -static uint32_t mpic_src_ext_read (void *opaque, hwaddr addr) -{ - openpic_t *mpp = opaque; - uint32_t retval; - int idx = MPIC_EXT_IRQ; + memory_region_init(&opp->mem, "openpic", 0x40000); - DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr); - retval = 0xFFFFFFFF; - if (addr & 0xF) - return retval; - - if (addr < MPIC_EXT_REG_SIZE) { - idx += (addr & 0xFFF0) >> 5; - if (addr & 0x10) { - /* EXDE / IFEDE / IEEDE */ - retval = read_IRQreg_ide(mpp, idx); - } else { - /* EXVP / IFEVP / IEEVP */ - retval = read_IRQreg_ipvp(mpp, idx); + for (i = 0; i < ARRAY_SIZE(list_le); i++) { + if (!list[i].map) { + continue; } - DPRINTF("%s: => %08x\n", __func__, retval); - } - - return retval; -} - -static void mpic_src_int_write (void *opaque, hwaddr addr, - uint32_t val) -{ - openpic_t *mpp = opaque; - int idx = MPIC_INT_IRQ; - DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val); - if (addr & 0xF) - return; - - if (addr < MPIC_INT_REG_SIZE) { - idx += (addr & 0xFFF0) >> 5; - if (addr & 0x10) { - /* EXDE / IFEDE / IEEDE */ - write_IRQreg_ide(mpp, idx, val); - } else { - /* EXVP / IFEVP / IEEVP */ - write_IRQreg_ipvp(mpp, idx, val); - } - } -} - -static uint32_t mpic_src_int_read (void *opaque, hwaddr addr) -{ - openpic_t *mpp = opaque; - uint32_t retval; - int idx = MPIC_INT_IRQ; - - DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr); - retval = 0xFFFFFFFF; - if (addr & 0xF) - return retval; + memory_region_init_io(&opp->sub_io_mem[i], list[i].ops, opp, + list[i].name, list[i].size); - if (addr < MPIC_INT_REG_SIZE) { - idx += (addr & 0xFFF0) >> 5; - if (addr & 0x10) { - /* EXDE / IFEDE / IEEDE */ - retval = read_IRQreg_ide(mpp, idx); - } else { - /* EXVP / IFEVP / IEEVP */ - retval = read_IRQreg_ipvp(mpp, idx); - } - DPRINTF("%s: => %08x\n", __func__, retval); + memory_region_add_subregion(&opp->mem, list[i].start_addr, + &opp->sub_io_mem[i]); } - return retval; -} - -static void mpic_src_msg_write (void *opaque, hwaddr addr, - uint32_t val) -{ - openpic_t *mpp = opaque; - int idx = MPIC_MSG_IRQ; - - DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val); - if (addr & 0xF) - return; - - if (addr < MPIC_MSG_REG_SIZE) { - idx += (addr & 0xFFF0) >> 5; - if (addr & 0x10) { - /* EXDE / IFEDE / IEEDE */ - write_IRQreg_ide(mpp, idx, val); - } else { - /* EXVP / IFEVP / IEEVP */ - write_IRQreg_ipvp(mpp, idx, val); + for (i = 0; i < opp->nb_cpus; i++) { + opp->dst[i].irqs = g_new(qemu_irq, OPENPIC_OUTPUT_NB); + for (j = 0; j < OPENPIC_OUTPUT_NB; j++) { + sysbus_init_irq(dev, &opp->dst[i].irqs[j]); } } -} -static uint32_t mpic_src_msg_read (void *opaque, hwaddr addr) -{ - openpic_t *mpp = opaque; - uint32_t retval; - int idx = MPIC_MSG_IRQ; - - DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr); - retval = 0xFFFFFFFF; - if (addr & 0xF) - return retval; + register_savevm(&opp->busdev.qdev, "openpic", 0, 2, + openpic_save, openpic_load, opp); - if (addr < MPIC_MSG_REG_SIZE) { - idx += (addr & 0xFFF0) >> 5; - if (addr & 0x10) { - /* EXDE / IFEDE / IEEDE */ - retval = read_IRQreg_ide(mpp, idx); - } else { - /* EXVP / IFEVP / IEEVP */ - retval = read_IRQreg_ipvp(mpp, idx); - } - DPRINTF("%s: => %08x\n", __func__, retval); - } + sysbus_init_mmio(dev, &opp->mem); + qdev_init_gpio_in(&dev->qdev, openpic_set_irq, opp->max_irq); - return retval; + return 0; } -static void mpic_src_msi_write (void *opaque, hwaddr addr, - uint32_t val) -{ - openpic_t *mpp = opaque; - int idx = MPIC_MSI_IRQ; - - DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val); - if (addr & 0xF) - return; +static Property openpic_properties[] = { + DEFINE_PROP_UINT32("model", OpenPICState, model, OPENPIC_MODEL_FSL_MPIC_20), + DEFINE_PROP_UINT32("nb_cpus", OpenPICState, nb_cpus, 1), + DEFINE_PROP_END_OF_LIST(), +}; - if (addr < MPIC_MSI_REG_SIZE) { - idx += (addr & 0xFFF0) >> 5; - if (addr & 0x10) { - /* EXDE / IFEDE / IEEDE */ - write_IRQreg_ide(mpp, idx, val); - } else { - /* EXVP / IFEVP / IEEVP */ - write_IRQreg_ipvp(mpp, idx, val); - } - } -} -static uint32_t mpic_src_msi_read (void *opaque, hwaddr addr) +static void openpic_class_init(ObjectClass *klass, void *data) { - openpic_t *mpp = opaque; - uint32_t retval; - int idx = MPIC_MSI_IRQ; + DeviceClass *dc = DEVICE_CLASS(klass); + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); - DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr); - retval = 0xFFFFFFFF; - if (addr & 0xF) - return retval; - - if (addr < MPIC_MSI_REG_SIZE) { - idx += (addr & 0xFFF0) >> 5; - if (addr & 0x10) { - /* EXDE / IFEDE / IEEDE */ - retval = read_IRQreg_ide(mpp, idx); - } else { - /* EXVP / IFEVP / IEEVP */ - retval = read_IRQreg_ipvp(mpp, idx); - } - DPRINTF("%s: => %08x\n", __func__, retval); - } - - return retval; + k->init = openpic_init; + dc->props = openpic_properties; + dc->reset = openpic_reset; } -static const MemoryRegionOps mpic_glb_ops = { - .old_mmio = { - .write = { openpic_buggy_write, - openpic_buggy_write, - openpic_gbl_write, - }, - .read = { openpic_buggy_read, - openpic_buggy_read, - openpic_gbl_read, - }, - }, - .endianness = DEVICE_BIG_ENDIAN, +static TypeInfo openpic_info = { + .name = "openpic", + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(OpenPICState), + .class_init = openpic_class_init, }; -static const MemoryRegionOps mpic_tmr_ops = { - .old_mmio = { - .write = { openpic_buggy_write, - openpic_buggy_write, - mpic_timer_write, - }, - .read = { openpic_buggy_read, - openpic_buggy_read, - mpic_timer_read, - }, - }, - .endianness = DEVICE_BIG_ENDIAN, -}; - -static const MemoryRegionOps mpic_cpu_ops = { - .old_mmio = { - .write = { openpic_buggy_write, - openpic_buggy_write, - openpic_cpu_write, - }, - .read = { openpic_buggy_read, - openpic_buggy_read, - openpic_cpu_read, - }, - }, - .endianness = DEVICE_BIG_ENDIAN, -}; - -static const MemoryRegionOps mpic_ext_ops = { - .old_mmio = { - .write = { openpic_buggy_write, - openpic_buggy_write, - mpic_src_ext_write, - }, - .read = { openpic_buggy_read, - openpic_buggy_read, - mpic_src_ext_read, - }, - }, - .endianness = DEVICE_BIG_ENDIAN, -}; - -static const MemoryRegionOps mpic_int_ops = { - .old_mmio = { - .write = { openpic_buggy_write, - openpic_buggy_write, - mpic_src_int_write, - }, - .read = { openpic_buggy_read, - openpic_buggy_read, - mpic_src_int_read, - }, - }, - .endianness = DEVICE_BIG_ENDIAN, -}; - -static const MemoryRegionOps mpic_msg_ops = { - .old_mmio = { - .write = { openpic_buggy_write, - openpic_buggy_write, - mpic_src_msg_write, - }, - .read = { openpic_buggy_read, - openpic_buggy_read, - mpic_src_msg_read, - }, - }, - .endianness = DEVICE_BIG_ENDIAN, -}; - -static const MemoryRegionOps mpic_msi_ops = { - .old_mmio = { - .write = { openpic_buggy_write, - openpic_buggy_write, - mpic_src_msi_write, - }, - .read = { openpic_buggy_read, - openpic_buggy_read, - mpic_src_msi_read, - }, - }, - .endianness = DEVICE_BIG_ENDIAN, -}; - -qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base, - int nb_cpus, qemu_irq **irqs, qemu_irq irq_out) +static void openpic_register_types(void) { - openpic_t *mpp; - int i; - struct { - const char *name; - MemoryRegionOps const *ops; - hwaddr start_addr; - ram_addr_t size; - } const list[] = { - {"glb", &mpic_glb_ops, MPIC_GLB_REG_START, MPIC_GLB_REG_SIZE}, - {"tmr", &mpic_tmr_ops, MPIC_TMR_REG_START, MPIC_TMR_REG_SIZE}, - {"ext", &mpic_ext_ops, MPIC_EXT_REG_START, MPIC_EXT_REG_SIZE}, - {"int", &mpic_int_ops, MPIC_INT_REG_START, MPIC_INT_REG_SIZE}, - {"msg", &mpic_msg_ops, MPIC_MSG_REG_START, MPIC_MSG_REG_SIZE}, - {"msi", &mpic_msi_ops, MPIC_MSI_REG_START, MPIC_MSI_REG_SIZE}, - {"cpu", &mpic_cpu_ops, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE}, - }; - - mpp = g_malloc0(sizeof(openpic_t)); - - memory_region_init(&mpp->mem, "mpic", 0x40000); - memory_region_add_subregion(address_space, base, &mpp->mem); - - for (i = 0; i < sizeof(list)/sizeof(list[0]); i++) { - - memory_region_init_io(&mpp->sub_io_mem[i], list[i].ops, mpp, - list[i].name, list[i].size); - - memory_region_add_subregion(&mpp->mem, list[i].start_addr, - &mpp->sub_io_mem[i]); - } - - mpp->nb_cpus = nb_cpus; - mpp->max_irq = MPIC_MAX_IRQ; - mpp->irq_ipi0 = MPIC_IPI_IRQ; - mpp->irq_tim0 = MPIC_TMR_IRQ; - - for (i = 0; i < nb_cpus; i++) - mpp->dst[i].irqs = irqs[i]; - mpp->irq_out = irq_out; - - mpp->irq_raise = mpic_irq_raise; - mpp->reset = mpic_reset; - - register_savevm(NULL, "mpic", 0, 2, openpic_save, openpic_load, mpp); - qemu_register_reset(mpic_reset, mpp); - - return qemu_allocate_irqs(openpic_set_irq, mpp, mpp->max_irq); + type_register_static(&openpic_info); } + +type_init(openpic_register_types) diff --git a/hw/openpic.h b/hw/openpic.h index f50a1e42bd..e226d7b563 100644 --- a/hw/openpic.h +++ b/hw/openpic.h @@ -11,8 +11,7 @@ enum { OPENPIC_OUTPUT_NB, }; -qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus, - qemu_irq **irqs, qemu_irq irq_out); -qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base, - int nb_cpus, qemu_irq **irqs, qemu_irq irq_out); +#define OPENPIC_MODEL_RAVEN 0 +#define OPENPIC_MODEL_FSL_MPIC_20 1 + #endif /* __OPENPIC_H__ */ diff --git a/hw/pc_sysfw.c b/hw/pc_sysfw.c index 40bced2322..d7ea3a5595 100644 --- a/hw/pc_sysfw.c +++ b/hw/pc_sysfw.c @@ -98,7 +98,7 @@ static void pc_fw_add_pflash_drv(void) return; } - if (!drive_init(opts, machine->use_scsi)) { + if (!drive_init(opts, machine->block_default_type)) { qemu_opts_del(opts); } } diff --git a/hw/pci/Makefile.objs b/hw/pci/Makefile.objs index aa7a0e84b5..fe965fe2f6 100644 --- a/hw/pci/Makefile.objs +++ b/hw/pci/Makefile.objs @@ -5,3 +5,5 @@ common-obj-$(CONFIG_PCI) += slotid_cap.o common-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o common-obj-$(CONFIG_NO_PCI) += pci-stub.o + +extra-obj-y += pci-stub.o diff --git a/hw/pci/msix.c b/hw/pci/msix.c index 917327bfe6..a6a401e286 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -180,8 +180,7 @@ static void msix_table_mmio_write(void *opaque, hwaddr addr, static const MemoryRegionOps msix_table_mmio_ops = { .read = msix_table_mmio_read, .write = msix_table_mmio_write, - /* TODO: MSIX should be LITTLE_ENDIAN. */ - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, .max_access_size = 4, @@ -198,8 +197,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, static const MemoryRegionOps msix_pba_mmio_ops = { .read = msix_pba_mmio_read, - /* TODO: MSIX should be LITTLE_ENDIAN. */ - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_LITTLE_ENDIAN, .valid = { .min_access_size = 4, .max_access_size = 4, diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index cb7cf8fba5..afdcc0e531 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -11,7 +11,7 @@ obj-y += ppc_newworld.o obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o obj-$(CONFIG_PSERIES) += spapr_pci.o pci/pci-hotplug.o spapr_iommu.o -obj-$(CONFIG_PSERIES) += spapr_events.o +obj-$(CONFIG_PSERIES) += spapr_events.o spapr_nvram.o # PowerPC 4xx boards obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o obj-y += ppc440_bamboo.o diff --git a/hw/ppc/e500-ccsr.h b/hw/ppc/e500-ccsr.h new file mode 100644 index 0000000000..f20f51bcd2 --- /dev/null +++ b/hw/ppc/e500-ccsr.h @@ -0,0 +1,17 @@ +#ifndef E500_CCSR_H +#define E500_CCSR_H + +#include "../sysbus.h" + +typedef struct PPCE500CCSRState { + /*< private >*/ + SysBusDevice parent; + /*< public >*/ + + MemoryRegion ccsr_space; +} PPCE500CCSRState; + +#define TYPE_CCSR "e500-ccsr" +#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR) + +#endif /* E500_CCSR_H */ diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index f77c488af7..8fab508c07 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -17,6 +17,7 @@ #include "config.h" #include "qemu-common.h" #include "e500.h" +#include "e500-ccsr.h" #include "net.h" #include "hw/hw.h" #include "hw/serial.h" @@ -33,6 +34,7 @@ #include "hw/sysbus.h" #include "exec-memory.h" #include "host-utils.h" +#include "hw/ppce500_pci.h" #define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb" #define UIMAGE_LOAD_BASE 0 @@ -46,13 +48,16 @@ /* TODO: parameterize */ #define MPC8544_CCSRBAR_BASE 0xE0000000ULL #define MPC8544_CCSRBAR_SIZE 0x00100000ULL -#define MPC8544_MPIC_REGS_BASE (MPC8544_CCSRBAR_BASE + 0x40000ULL) -#define MPC8544_SERIAL0_REGS_BASE (MPC8544_CCSRBAR_BASE + 0x4500ULL) -#define MPC8544_SERIAL1_REGS_BASE (MPC8544_CCSRBAR_BASE + 0x4600ULL) -#define MPC8544_PCI_REGS_BASE (MPC8544_CCSRBAR_BASE + 0x8000ULL) +#define MPC8544_MPIC_REGS_OFFSET 0x40000ULL +#define MPC8544_MSI_REGS_OFFSET 0x41600ULL +#define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL +#define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL +#define MPC8544_PCI_REGS_OFFSET 0x8000ULL +#define MPC8544_PCI_REGS_BASE (MPC8544_CCSRBAR_BASE + \ + MPC8544_PCI_REGS_OFFSET) #define MPC8544_PCI_REGS_SIZE 0x1000ULL #define MPC8544_PCI_IO 0xE1000000ULL -#define MPC8544_UTIL_BASE (MPC8544_CCSRBAR_BASE + 0xe0000ULL) +#define MPC8544_UTIL_OFFSET 0xe0000ULL #define MPC8544_SPIN_BASE 0xEF000000ULL struct boot_info @@ -62,25 +67,35 @@ struct boot_info uint32_t entry; }; -static void pci_map_create(void *fdt, uint32_t *pci_map, uint32_t mpic) +static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot, + int nr_slots, int *len) { - int i; - const uint32_t tmp[] = { - /* IDSEL 0x11 J17 Slot 1 */ - 0x8800, 0x0, 0x0, 0x1, mpic, 0x2, 0x1, - 0x8800, 0x0, 0x0, 0x2, mpic, 0x3, 0x1, - 0x8800, 0x0, 0x0, 0x3, mpic, 0x4, 0x1, - 0x8800, 0x0, 0x0, 0x4, mpic, 0x1, 0x1, - - /* IDSEL 0x12 J16 Slot 2 */ - 0x9000, 0x0, 0x0, 0x1, mpic, 0x3, 0x1, - 0x9000, 0x0, 0x0, 0x2, mpic, 0x4, 0x1, - 0x9000, 0x0, 0x0, 0x3, mpic, 0x2, 0x1, - 0x9000, 0x0, 0x0, 0x4, mpic, 0x1, 0x1, - }; - for (i = 0; i < (7 * 8); i++) { - pci_map[i] = cpu_to_be32(tmp[i]); + int i = 0; + int slot; + int pci_irq; + int host_irq; + int last_slot = first_slot + nr_slots; + uint32_t *pci_map; + + *len = nr_slots * 4 * 7 * sizeof(uint32_t); + pci_map = g_malloc(*len); + + for (slot = first_slot; slot < last_slot; slot++) { + for (pci_irq = 0; pci_irq < 4; pci_irq++) { + pci_map[i++] = cpu_to_be32(slot << 11); + pci_map[i++] = cpu_to_be32(0x0); + pci_map[i++] = cpu_to_be32(0x0); + pci_map[i++] = cpu_to_be32(pci_irq + 1); + pci_map[i++] = cpu_to_be32(mpic); + host_irq = ppce500_pci_map_irq_slot(slot, pci_irq); + pci_map[i++] = cpu_to_be32(host_irq + 1); + pci_map[i++] = cpu_to_be32(0x1); + } } + + assert((i * sizeof(uint32_t)) == *len); + + return pci_map; } static void dt_serial_create(void *fdt, unsigned long long offset, @@ -124,9 +139,12 @@ static int ppce500_load_device_tree(CPUPPCState *env, char soc[128]; char mpic[128]; uint32_t mpic_ph; + uint32_t msi_ph; char gutil[128]; char pci[128]; - uint32_t pci_map[7 * 8]; + char msi[128]; + uint32_t *pci_map = NULL; + int len; uint32_t pci_ranges[14] = { 0x2000000, 0x0, 0xc0000000, @@ -267,13 +285,12 @@ static int ppce500_load_device_tree(CPUPPCState *env, /* XXX should contain a reasonable value */ qemu_devtree_setprop_cell(fdt, soc, "bus-frequency", 0); - snprintf(mpic, sizeof(mpic), "%s/pic@%llx", soc, - MPC8544_MPIC_REGS_BASE - MPC8544_CCSRBAR_BASE); + snprintf(mpic, sizeof(mpic), "%s/pic@%llx", soc, MPC8544_MPIC_REGS_OFFSET); qemu_devtree_add_subnode(fdt, mpic); qemu_devtree_setprop_string(fdt, mpic, "device_type", "open-pic"); qemu_devtree_setprop_string(fdt, mpic, "compatible", "chrp,open-pic"); - qemu_devtree_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_BASE - - MPC8544_CCSRBAR_BASE, 0x40000); + qemu_devtree_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_OFFSET, + 0x40000); qemu_devtree_setprop_cell(fdt, mpic, "#address-cells", 0); qemu_devtree_setprop_cell(fdt, mpic, "#interrupt-cells", 2); mpic_ph = qemu_devtree_alloc_phandle(fdt); @@ -286,19 +303,37 @@ static int ppce500_load_device_tree(CPUPPCState *env, * device it finds in the dt as serial output device. And we generate * devices in reverse order to the dt. */ - dt_serial_create(fdt, MPC8544_SERIAL1_REGS_BASE - MPC8544_CCSRBAR_BASE, + dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET, soc, mpic, "serial1", 1, false); - dt_serial_create(fdt, MPC8544_SERIAL0_REGS_BASE - MPC8544_CCSRBAR_BASE, + dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET, soc, mpic, "serial0", 0, true); snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc, - MPC8544_UTIL_BASE - MPC8544_CCSRBAR_BASE); + MPC8544_UTIL_OFFSET); qemu_devtree_add_subnode(fdt, gutil); qemu_devtree_setprop_string(fdt, gutil, "compatible", "fsl,mpc8544-guts"); - qemu_devtree_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_BASE - - MPC8544_CCSRBAR_BASE, 0x1000); + qemu_devtree_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_OFFSET, 0x1000); qemu_devtree_setprop(fdt, gutil, "fsl,has-rstcr", NULL, 0); + snprintf(msi, sizeof(msi), "/%s/msi@%llx", soc, MPC8544_MSI_REGS_OFFSET); + qemu_devtree_add_subnode(fdt, msi); + qemu_devtree_setprop_string(fdt, msi, "compatible", "fsl,mpic-msi"); + qemu_devtree_setprop_cells(fdt, msi, "reg", MPC8544_MSI_REGS_OFFSET, 0x200); + msi_ph = qemu_devtree_alloc_phandle(fdt); + qemu_devtree_setprop_cells(fdt, msi, "msi-available-ranges", 0x0, 0x100); + qemu_devtree_setprop_phandle(fdt, msi, "interrupt-parent", mpic); + qemu_devtree_setprop_cells(fdt, msi, "interrupts", + 0xe0, 0x0, + 0xe1, 0x0, + 0xe2, 0x0, + 0xe3, 0x0, + 0xe4, 0x0, + 0xe5, 0x0, + 0xe6, 0x0, + 0xe7, 0x0); + qemu_devtree_setprop_cell(fdt, msi, "phandle", msi_ph); + qemu_devtree_setprop_cell(fdt, msi, "linux,phandle", msi_ph); + snprintf(pci, sizeof(pci), "/pci@%llx", MPC8544_PCI_REGS_BASE); qemu_devtree_add_subnode(fdt, pci); qemu_devtree_setprop_cell(fdt, pci, "cell-index", 0); @@ -306,14 +341,17 @@ static int ppce500_load_device_tree(CPUPPCState *env, qemu_devtree_setprop_string(fdt, pci, "device_type", "pci"); qemu_devtree_setprop_cells(fdt, pci, "interrupt-map-mask", 0xf800, 0x0, 0x0, 0x7); - pci_map_create(fdt, pci_map, qemu_devtree_get_phandle(fdt, mpic)); - qemu_devtree_setprop(fdt, pci, "interrupt-map", pci_map, sizeof(pci_map)); + pci_map = pci_map_create(fdt, qemu_devtree_get_phandle(fdt, mpic), + params->pci_first_slot, params->pci_nr_slots, + &len); + qemu_devtree_setprop(fdt, pci, "interrupt-map", pci_map, len); qemu_devtree_setprop_phandle(fdt, pci, "interrupt-parent", mpic); qemu_devtree_setprop_cells(fdt, pci, "interrupts", 24, 2); qemu_devtree_setprop_cells(fdt, pci, "bus-range", 0, 255); for (i = 0; i < 14; i++) { pci_ranges[i] = cpu_to_be32(pci_ranges[i]); } + qemu_devtree_setprop_cell(fdt, pci, "fsl,msi", msi_ph); qemu_devtree_setprop(fdt, pci, "ranges", pci_ranges, sizeof(pci_ranges)); qemu_devtree_setprop_cells(fdt, pci, "reg", MPC8544_PCI_REGS_BASE >> 32, MPC8544_PCI_REGS_BASE, 0, 0x1000); @@ -340,6 +378,7 @@ done: ret = fdt_size; out: + g_free(pci_map); return ret; } @@ -417,11 +456,14 @@ void ppce500_init(PPCE500Params *params) target_ulong dt_base = 0; target_ulong initrd_base = 0; target_long initrd_size=0; - int i=0; + int i = 0, j, k; unsigned int pci_irq_nrs[4] = {1, 2, 3, 4}; qemu_irq **irqs, *mpic; DeviceState *dev; CPUPPCState *firstenv = NULL; + MemoryRegion *ccsr_addr_space; + SysBusDevice *s; + PPCE500CCSRState *ccsr; /* Setup CPUs */ if (params->cpu_model == NULL) { @@ -450,7 +492,8 @@ void ppce500_init(PPCE500Params *params) irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT]; irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT]; env->spr[SPR_BOOKE_PIR] = env->cpu_index = i; - env->mpic_cpu_base = MPC8544_MPIC_REGS_BASE + 0x20000; + env->mpic_cpu_base = MPC8544_CCSRBAR_BASE + + MPC8544_MPIC_REGS_OFFSET + 0x20000; ppc_booke_timers_init(env, 400000000, PPC_TIMER_E500); @@ -477,35 +520,69 @@ void ppce500_init(PPCE500Params *params) vmstate_register_ram_global(ram); memory_region_add_subregion(address_space_mem, 0, ram); + dev = qdev_create(NULL, "e500-ccsr"); + object_property_add_child(qdev_get_machine(), "e500-ccsr", + OBJECT(dev), NULL); + qdev_init_nofail(dev); + ccsr = CCSR(dev); + ccsr_addr_space = &ccsr->ccsr_space; + memory_region_add_subregion(address_space_mem, MPC8544_CCSRBAR_BASE, + ccsr_addr_space); + /* MPIC */ - mpic = mpic_init(address_space_mem, MPC8544_MPIC_REGS_BASE, - smp_cpus, irqs, NULL); + mpic = g_new(qemu_irq, 256); + dev = qdev_create(NULL, "openpic"); + qdev_prop_set_uint32(dev, "nb_cpus", smp_cpus); + qdev_prop_set_uint32(dev, "model", OPENPIC_MODEL_FSL_MPIC_20); + qdev_init_nofail(dev); + s = sysbus_from_qdev(dev); + + k = 0; + for (i = 0; i < smp_cpus; i++) { + for (j = 0; j < OPENPIC_OUTPUT_NB; j++) { + sysbus_connect_irq(s, k++, irqs[i][j]); + } + } - if (!mpic) { - cpu_abort(env, "MPIC failed to initialize\n"); + for (i = 0; i < 256; i++) { + mpic[i] = qdev_get_gpio_in(dev, i); } + memory_region_add_subregion(ccsr_addr_space, MPC8544_MPIC_REGS_OFFSET, + s->mmio[0].memory); + /* Serial */ if (serial_hds[0]) { - serial_mm_init(address_space_mem, MPC8544_SERIAL0_REGS_BASE, - 0, mpic[12+26], 399193, + serial_mm_init(ccsr_addr_space, MPC8544_SERIAL0_REGS_OFFSET, + 0, mpic[42], 399193, serial_hds[0], DEVICE_BIG_ENDIAN); } if (serial_hds[1]) { - serial_mm_init(address_space_mem, MPC8544_SERIAL1_REGS_BASE, - 0, mpic[12+26], 399193, + serial_mm_init(ccsr_addr_space, MPC8544_SERIAL1_REGS_OFFSET, + 0, mpic[42], 399193, serial_hds[1], DEVICE_BIG_ENDIAN); } /* General Utility device */ - sysbus_create_simple("mpc8544-guts", MPC8544_UTIL_BASE, NULL); + dev = qdev_create(NULL, "mpc8544-guts"); + qdev_init_nofail(dev); + s = SYS_BUS_DEVICE(dev); + memory_region_add_subregion(ccsr_addr_space, MPC8544_UTIL_OFFSET, + sysbus_mmio_get_region(s, 0)); /* PCI */ - dev = sysbus_create_varargs("e500-pcihost", MPC8544_PCI_REGS_BASE, - mpic[pci_irq_nrs[0]], mpic[pci_irq_nrs[1]], - mpic[pci_irq_nrs[2]], mpic[pci_irq_nrs[3]], - NULL); + dev = qdev_create(NULL, "e500-pcihost"); + qdev_prop_set_uint32(dev, "first_slot", params->pci_first_slot); + qdev_init_nofail(dev); + s = SYS_BUS_DEVICE(dev); + sysbus_connect_irq(s, 0, mpic[pci_irq_nrs[0]]); + sysbus_connect_irq(s, 1, mpic[pci_irq_nrs[1]]); + sysbus_connect_irq(s, 2, mpic[pci_irq_nrs[2]]); + sysbus_connect_irq(s, 3, mpic[pci_irq_nrs[3]]); + memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET, + sysbus_mmio_get_region(s, 0)); + pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0"); if (!pci_bus) printf("couldn't create PCI controller!\n"); @@ -578,3 +655,33 @@ void ppce500_init(PPCE500Params *params) kvmppc_init(); } } + +static int e500_ccsr_initfn(SysBusDevice *dev) +{ + PPCE500CCSRState *ccsr; + + ccsr = CCSR(dev); + memory_region_init(&ccsr->ccsr_space, "e500-ccsr", + MPC8544_CCSRBAR_SIZE); + return 0; +} + +static void e500_ccsr_class_init(ObjectClass *klass, void *data) +{ + SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); + k->init = e500_ccsr_initfn; +} + +static const TypeInfo e500_ccsr_info = { + .name = TYPE_CCSR, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(PPCE500CCSRState), + .class_init = e500_ccsr_class_init, +}; + +static void e500_register_types(void) +{ + type_register_static(&e500_ccsr_info); +} + +type_init(e500_register_types) diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h index 7ae87f4e21..f5ff27385b 100644 --- a/hw/ppc/e500.h +++ b/hw/ppc/e500.h @@ -9,6 +9,8 @@ typedef struct PPCE500Params { const char *kernel_cmdline; const char *initrd_filename; const char *cpu_model; + int pci_first_slot; + int pci_nr_slots; /* e500-specific params */ diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c index 4cfb94061a..2992bd9794 100644 --- a/hw/ppc/e500plat.c +++ b/hw/ppc/e500plat.c @@ -14,6 +14,7 @@ #include "e500.h" #include "../boards.h" #include "device_tree.h" +#include "hw/pci.h" static void e500plat_fixup_devtree(PPCE500Params *params, void *fdt) { @@ -40,6 +41,8 @@ static void e500plat_init(QEMUMachineInitArgs *args) .kernel_cmdline = kernel_cmdline, .initrd_filename = initrd_filename, .cpu_model = cpu_model, + .pci_first_slot = 0x1, + .pci_nr_slots = PCI_SLOT_MAX - 1, .fixup_devtree = e500plat_fixup_devtree, }; diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c index e651661941..7e1761d20c 100644 --- a/hw/ppc/mpc8544ds.c +++ b/hw/ppc/mpc8544ds.c @@ -40,6 +40,8 @@ static void mpc8544ds_init(QEMUMachineInitArgs *args) .kernel_cmdline = kernel_cmdline, .initrd_filename = initrd_filename, .cpu_model = cpu_model, + .pci_first_slot = 0x11, + .pci_nr_slots = 2, .fixup_devtree = mpc8544ds_fixup_devtree, }; diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c index c1ff9d7c31..2bf3094e9f 100644 --- a/hw/ppc_newworld.c +++ b/hw/ppc_newworld.c @@ -67,6 +67,7 @@ #include "hw/usb.h" #include "blockdev.h" #include "exec-memory.h" +#include "sysbus.h" #define MAX_IDE_BUS 2 #define CFG_ADDR 0xf0000510 @@ -141,7 +142,7 @@ static void ppc_core99_init(QEMUMachineInitArgs *args) char *filename; qemu_irq *pic, **openpic_irqs; MemoryRegion *unin_memory = g_new(MemoryRegion, 1); - int linux_boot, i; + int linux_boot, i, j, k; MemoryRegion *ram = g_new(MemoryRegion, 1), *bios = g_new(MemoryRegion, 1); hwaddr kernel_base, initrd_base, cmdline_base = 0; long kernel_size, initrd_size; @@ -156,6 +157,8 @@ static void ppc_core99_init(QEMUMachineInitArgs *args) void *fw_cfg; void *dbdma; int machine_arch; + SysBusDevice *s; + DeviceState *dev; linux_boot = (kernel_filename != NULL); @@ -320,7 +323,25 @@ static void ppc_core99_init(QEMUMachineInitArgs *args) exit(1); } } - pic = openpic_init(&pic_mem, smp_cpus, openpic_irqs, NULL); + + pic = g_new(qemu_irq, 64); + + dev = qdev_create(NULL, "openpic"); + qdev_prop_set_uint32(dev, "model", OPENPIC_MODEL_RAVEN); + qdev_init_nofail(dev); + s = sysbus_from_qdev(dev); + pic_mem = s->mmio[0].memory; + k = 0; + for (i = 0; i < smp_cpus; i++) { + for (j = 0; j < OPENPIC_OUTPUT_NB; j++) { + sysbus_connect_irq(s, k++, openpic_irqs[i][j]); + } + } + + for (i = 0; i < 64; i++) { + pic[i] = qdev_get_gpio_in(dev, i); + } + if (PPC_INPUT(env) == PPC_FLAGS_INPUT_970) { /* 970 gets a U3 bus */ pci_bus = pci_pmac_u3_init(pic, get_system_memory(), get_system_io()); diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c index 39022aada0..9bffbb9f87 100644 --- a/hw/ppce500_pci.c +++ b/hw/ppce500_pci.c @@ -15,9 +15,11 @@ */ #include "hw.h" +#include "hw/ppc/e500-ccsr.h" #include "pci/pci.h" #include "pci/pci_host.h" #include "bswap.h" +#include "ppce500_pci.h" #ifdef DEBUG_PCI #define pci_debug(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__) @@ -86,12 +88,26 @@ struct PPCE500PCIState { struct pci_inbound pib[PPCE500_PCI_NR_PIBS]; uint32_t gasket_time; qemu_irq irq[4]; + uint32_t first_slot; /* mmio maps */ MemoryRegion container; MemoryRegion iomem; MemoryRegion pio; }; +#define TYPE_PPC_E500_PCI_BRIDGE "e500-host-bridge" +#define PPC_E500_PCI_BRIDGE(obj) \ + OBJECT_CHECK(PPCE500PCIBridgeState, (obj), TYPE_PPC_E500_PCI_BRIDGE) + +struct PPCE500PCIBridgeState { + /*< private >*/ + PCIDevice parent; + /*< public >*/ + + MemoryRegion bar0; +}; + +typedef struct PPCE500PCIBridgeState PPCE500PCIBridgeState; typedef struct PPCE500PCIState PPCE500PCIState; static uint64_t pci_reg_read4(void *opaque, hwaddr addr, @@ -238,17 +254,10 @@ static const MemoryRegionOps e500_pci_reg_ops = { static int mpc85xx_pci_map_irq(PCIDevice *pci_dev, int irq_num) { - int devno = pci_dev->devfn >> 3, ret = 0; + int devno = pci_dev->devfn >> 3; + int ret; - switch (devno) { - /* Two PCI slot */ - case 0x11: - case 0x12: - ret = (irq_num + devno - 0x10) % 4; - break; - default: - printf("Error:%s:unknown dev number\n", __func__); - } + ret = ppce500_pci_map_irq_slot(devno, irq_num); pci_debug("%s: devfn %x irq %d -> %d devno:%x\n", __func__, pci_dev->devfn, irq_num, ret, devno); @@ -310,6 +319,24 @@ static const VMStateDescription vmstate_ppce500_pci = { #include "exec-memory.h" +static int e500_pcihost_bridge_initfn(PCIDevice *d) +{ + PPCE500PCIBridgeState *b = PPC_E500_PCI_BRIDGE(d); + PPCE500CCSRState *ccsr = CCSR(container_get(qdev_get_machine(), + "/e500-ccsr")); + + pci_config_set_class(d->config, PCI_CLASS_BRIDGE_PCI); + d->config[PCI_HEADER_TYPE] = + (d->config[PCI_HEADER_TYPE] & PCI_HEADER_TYPE_MULTI_FUNCTION) | + PCI_HEADER_TYPE_BRIDGE; + + memory_region_init_alias(&b->bar0, "e500-pci-bar0", &ccsr->ccsr_space, + 0, int128_get64(ccsr->ccsr_space.size)); + pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0); + + return 0; +} + static int e500_pcihost_initfn(SysBusDevice *dev) { PCIHostState *h; @@ -329,7 +356,7 @@ static int e500_pcihost_initfn(SysBusDevice *dev) b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq, mpc85xx_pci_map_irq, s->irq, address_space_mem, - &s->pio, PCI_DEVFN(0x11, 0), 4); + &s->pio, PCI_DEVFN(s->first_slot, 0), 4); h->bus = b; pci_create_simple(b, 0, "e500-host-bridge"); @@ -355,6 +382,7 @@ static void e500_host_bridge_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + k->init = e500_pcihost_bridge_initfn; k->vendor_id = PCI_VENDOR_ID_FREESCALE; k->device_id = PCI_DEVICE_ID_MPC8533E; k->class_id = PCI_CLASS_PROCESSOR_POWERPC; @@ -364,16 +392,22 @@ static void e500_host_bridge_class_init(ObjectClass *klass, void *data) static const TypeInfo e500_host_bridge_info = { .name = "e500-host-bridge", .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PCIDevice), + .instance_size = sizeof(PPCE500PCIBridgeState), .class_init = e500_host_bridge_class_init, }; +static Property pcihost_properties[] = { + DEFINE_PROP_UINT32("first_slot", PPCE500PCIState, first_slot, 0x11), + DEFINE_PROP_END_OF_LIST(), +}; + static void e500_pcihost_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass); k->init = e500_pcihost_initfn; + dc->props = pcihost_properties; dc->vmsd = &vmstate_ppce500_pci; } diff --git a/hw/ppce500_pci.h b/hw/ppce500_pci.h new file mode 100644 index 0000000000..61f773ef30 --- /dev/null +++ b/hw/ppce500_pci.h @@ -0,0 +1,9 @@ +#ifndef PPCE500_PCI_H +#define PPCE500_PCI_H + +static inline int ppce500_pci_map_irq_slot(int devno, int irq_num) +{ + return (devno + irq_num) % 4; +} + +#endif @@ -122,7 +122,6 @@ static QEMUMachine puv3_machine = { .desc = "PKUnity Version-3 based on UniCore32", .init = puv3_init, .is_default = 1, - .use_scsi = 0, }; static void puv3_machine_init(void) diff --git a/hw/realview.c b/hw/realview.c index 149bb562af..5fbdcbf2b0 100644 --- a/hw/realview.c +++ b/hw/realview.c @@ -364,14 +364,14 @@ static QEMUMachine realview_eb_machine = { .name = "realview-eb", .desc = "ARM RealView Emulation Baseboard (ARM926EJ-S)", .init = realview_eb_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine realview_eb_mpcore_machine = { .name = "realview-eb-mpcore", .desc = "ARM RealView Emulation Baseboard (ARM11MPCore)", .init = realview_eb_mpcore_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -385,7 +385,7 @@ static QEMUMachine realview_pbx_a9_machine = { .name = "realview-pbx-a9", .desc = "ARM RealView Platform Baseboard Explore for Cortex-A9", .init = realview_pbx_a9_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c index ca1bb09816..7aca0c4aad 100644 --- a/hw/s390-virtio.c +++ b/hw/s390-virtio.c @@ -314,21 +314,6 @@ static void s390_init(QEMUMachineInitArgs *args) qdev_set_nic_properties(dev, nd); qdev_init_nofail(dev); } - - /* Create VirtIO disk drives */ - for(i = 0; i < MAX_BLK_DEVS; i++) { - DriveInfo *dinfo; - DeviceState *dev; - - dinfo = drive_get(IF_IDE, 0, i); - if (!dinfo) { - continue; - } - - dev = qdev_create((BusState *)s390_bus, "virtio-blk-s390"); - qdev_prop_set_drive_nofail(dev, "drive", dinfo->bdrv); - qdev_init_nofail(dev); - } } static QEMUMachine s390_machine = { @@ -336,6 +321,7 @@ static QEMUMachine s390_machine = { .alias = "s390", .desc = "VirtIO based S390 machine", .init = s390_init, + .block_default_type = IF_VIRTIO, .no_cdrom = 1, .no_floppy = 1, .no_serial = 1, @@ -352,3 +338,4 @@ static void s390_machine_init(void) } machine_init(s390_machine_init); + diff --git a/hw/spapr.c b/hw/spapr.c index b0125a892c..9bd2fd5c8c 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -657,6 +657,36 @@ static void spapr_cpu_reset(void *opaque) (spapr->htab_shift - 18); } +static void spapr_create_nvram(sPAPREnvironment *spapr) +{ + QemuOpts *machine_opts; + DeviceState *dev; + + dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram"); + + machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0); + if (machine_opts) { + const char *drivename; + + drivename = qemu_opt_get(machine_opts, "nvram"); + if (drivename) { + BlockDriverState *bs; + + bs = bdrv_find(drivename); + if (!bs) { + fprintf(stderr, "No such block device \"%s\" for nvram\n", + drivename); + exit(1); + } + qdev_prop_set_drive_nofail(dev, "drive", bs); + } + } + + qdev_init_nofail(dev); + + spapr->nvram = (struct sPAPRNVRAM *)dev; +} + /* Returns whether we want to use VGA or not */ static int spapr_vga_init(PCIBus *pci_bus) { @@ -801,7 +831,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) /* Set up Interrupt Controller */ spapr->icp = xics_system_init(XICS_IRQS); - spapr->next_irq = 16; + spapr->next_irq = XICS_IRQ_BASE; /* Set up EPOW events infrastructure */ spapr_events_init(spapr); @@ -818,6 +848,9 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) } } + /* We always have at least the nvram device on VIO */ + spapr_create_nvram(spapr); + /* Set up PCI */ spapr_pci_rtas_init(); @@ -924,9 +957,9 @@ static QEMUMachine spapr_machine = { .desc = "pSeries Logical Partition (PAPR compliant)", .init = ppc_spapr_init, .reset = ppc_spapr_reset, + .block_default_type = IF_SCSI, .max_cpus = MAX_CPUS, .no_parallel = 1, - .use_scsi = 1, }; static void spapr_machine_init(void) diff --git a/hw/spapr.h b/hw/spapr.h index efe7f5758f..600722f132 100644 --- a/hw/spapr.h +++ b/hw/spapr.h @@ -6,11 +6,13 @@ struct VIOsPAPRBus; struct sPAPRPHBState; +struct sPAPRNVRAM; struct icp_state; typedef struct sPAPREnvironment { struct VIOsPAPRBus *vio_bus; QLIST_HEAD(, sPAPRPHBState) phbs; + struct sPAPRNVRAM *nvram; struct icp_state *icp; hwaddr ram_limit; @@ -320,7 +322,7 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val) typedef void (*spapr_rtas_fn)(sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); -void spapr_rtas_register(const char *name, spapr_rtas_fn fn); +int spapr_rtas_register(const char *name, spapr_rtas_fn fn); target_ulong spapr_rtas_call(sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c index 02d78ccf28..3011b251d3 100644 --- a/hw/spapr_iommu.c +++ b/hw/spapr_iommu.c @@ -120,6 +120,12 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size) { sPAPRTCETable *tcet; + if (spapr_tce_find_by_liobn(liobn)) { + fprintf(stderr, "Attempted to create TCE table with duplicate" + " LIOBN 0x%x\n", liobn); + return NULL; + } + if (!window_size) { return NULL; } diff --git a/hw/spapr_nvram.c b/hw/spapr_nvram.c new file mode 100644 index 0000000000..512bb8d5d1 --- /dev/null +++ b/hw/spapr_nvram.c @@ -0,0 +1,196 @@ +/* + * QEMU sPAPR NVRAM emulation + * + * Copyright (C) 2012 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <sys/mman.h> +#include <libfdt.h> + +#include "device_tree.h" +#include "hw/sysbus.h" +#include "hw/spapr.h" +#include "hw/spapr_vio.h" + +typedef struct sPAPRNVRAM { + VIOsPAPRDevice sdev; + uint32_t size; + uint8_t *buf; + BlockDriverState *drive; +} sPAPRNVRAM; + +#define MIN_NVRAM_SIZE 8192 +#define DEFAULT_NVRAM_SIZE 65536 +#define MAX_NVRAM_SIZE (UINT16_MAX * 16) + +static void rtas_nvram_fetch(sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + sPAPRNVRAM *nvram = spapr->nvram; + hwaddr offset, buffer, len; + int alen; + void *membuf; + + if ((nargs != 3) || (nret != 2)) { + rtas_st(rets, 0, -3); + return; + } + + if (!nvram) { + rtas_st(rets, 0, -1); + rtas_st(rets, 1, 0); + return; + } + + offset = rtas_ld(args, 0); + buffer = rtas_ld(args, 1); + len = rtas_ld(args, 2); + + if (((offset + len) < offset) + || ((offset + len) > nvram->size)) { + rtas_st(rets, 0, -3); + rtas_st(rets, 1, 0); + return; + } + + membuf = cpu_physical_memory_map(buffer, &len, 1); + if (nvram->drive) { + alen = bdrv_pread(nvram->drive, offset, membuf, len); + } else { + assert(nvram->buf); + + memcpy(membuf, nvram->buf + offset, len); + alen = len; + } + cpu_physical_memory_unmap(membuf, len, 1, len); + + rtas_st(rets, 0, (alen < len) ? -1 : 0); + rtas_st(rets, 1, (alen < 0) ? 0 : alen); +} + +static void rtas_nvram_store(sPAPREnvironment *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + sPAPRNVRAM *nvram = spapr->nvram; + hwaddr offset, buffer, len; + int alen; + void *membuf; + + if ((nargs != 3) || (nret != 2)) { + rtas_st(rets, 0, -3); + return; + } + + if (!nvram) { + rtas_st(rets, 0, -1); + return; + } + + offset = rtas_ld(args, 0); + buffer = rtas_ld(args, 1); + len = rtas_ld(args, 2); + + if (((offset + len) < offset) + || ((offset + len) > nvram->size)) { + rtas_st(rets, 0, -3); + return; + } + + membuf = cpu_physical_memory_map(buffer, &len, 0); + if (nvram->drive) { + alen = bdrv_pwrite(nvram->drive, offset, membuf, len); + } else { + assert(nvram->buf); + + memcpy(nvram->buf + offset, membuf, len); + alen = len; + } + cpu_physical_memory_unmap(membuf, len, 0, len); + + rtas_st(rets, 0, (alen < len) ? -1 : 0); + rtas_st(rets, 1, (alen < 0) ? 0 : alen); +} + +static int spapr_nvram_init(VIOsPAPRDevice *dev) +{ + sPAPRNVRAM *nvram = (sPAPRNVRAM *)dev; + + if (nvram->drive) { + nvram->size = bdrv_getlength(nvram->drive); + } else { + nvram->size = DEFAULT_NVRAM_SIZE; + nvram->buf = g_malloc0(nvram->size); + } + + if ((nvram->size < MIN_NVRAM_SIZE) || (nvram->size > MAX_NVRAM_SIZE)) { + fprintf(stderr, "spapr-nvram must be between %d and %d bytes in size\n", + MIN_NVRAM_SIZE, MAX_NVRAM_SIZE); + return -1; + } + + spapr_rtas_register("nvram-fetch", rtas_nvram_fetch); + spapr_rtas_register("nvram-store", rtas_nvram_store); + + return 0; +} + +static int spapr_nvram_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off) +{ + sPAPRNVRAM *nvram = (sPAPRNVRAM *)dev; + + return fdt_setprop_cell(fdt, node_off, "#bytes", nvram->size); +} + +static Property spapr_nvram_properties[] = { + DEFINE_SPAPR_PROPERTIES(sPAPRNVRAM, sdev), + DEFINE_PROP_DRIVE("drive", sPAPRNVRAM, drive), + DEFINE_PROP_END_OF_LIST(), +}; + +static void spapr_nvram_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VIOsPAPRDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass); + + k->init = spapr_nvram_init; + k->devnode = spapr_nvram_devnode; + k->dt_name = "nvram"; + k->dt_type = "nvram"; + k->dt_compatible = "qemu,spapr-nvram"; + dc->props = spapr_nvram_properties; +} + +static const TypeInfo spapr_nvram_type_info = { + .name = "spapr-nvram", + .parent = TYPE_VIO_SPAPR_DEVICE, + .instance_size = sizeof(sPAPRNVRAM), + .class_init = spapr_nvram_class_init, +}; + +static void spapr_nvram_register_types(void) +{ + type_register_static(&spapr_nvram_type_info); +} + +type_init(spapr_nvram_register_types) diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c index 6d5c48a740..e618c2db53 100644 --- a/hw/spapr_rtas.c +++ b/hw/spapr_rtas.c @@ -242,7 +242,7 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr, return H_PARAMETER; } -void spapr_rtas_register(const char *name, spapr_rtas_fn fn) +int spapr_rtas_register(const char *name, spapr_rtas_fn fn) { int i; @@ -258,7 +258,7 @@ void spapr_rtas_register(const char *name, spapr_rtas_fn fn) rtas_next->name = name; rtas_next->fn = fn; - rtas_next++; + return (rtas_next++ - rtas_table) + TOKEN_BASE; } int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, @@ -301,7 +301,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr, for (i = 0; i < TOKEN_MAX; i++) { struct rtas_call *call = &rtas_table[i]; - if (!call->fn) { + if (!call->name) { continue; } diff --git a/hw/sun4m.c b/hw/sun4m.c index 1a786762aa..52cf82b681 100644 --- a/hw/sun4m.c +++ b/hw/sun4m.c @@ -1426,7 +1426,7 @@ static QEMUMachine ss5_machine = { .name = "SS-5", .desc = "Sun4m platform, SPARCstation 5", .init = ss5_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .is_default = 1, }; @@ -1434,7 +1434,7 @@ static QEMUMachine ss10_machine = { .name = "SS-10", .desc = "Sun4m platform, SPARCstation 10", .init = ss10_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -1442,7 +1442,7 @@ static QEMUMachine ss600mp_machine = { .name = "SS-600MP", .desc = "Sun4m platform, SPARCserver 600MP", .init = ss600mp_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -1450,7 +1450,7 @@ static QEMUMachine ss20_machine = { .name = "SS-20", .desc = "Sun4m platform, SPARCstation 20", .init = ss20_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -1458,35 +1458,35 @@ static QEMUMachine voyager_machine = { .name = "Voyager", .desc = "Sun4m platform, SPARCstation Voyager", .init = vger_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine ss_lx_machine = { .name = "LX", .desc = "Sun4m platform, SPARCstation LX", .init = ss_lx_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine ss4_machine = { .name = "SS-4", .desc = "Sun4m platform, SPARCstation 4", .init = ss4_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine scls_machine = { .name = "SPARCClassic", .desc = "Sun4m platform, SPARCClassic", .init = scls_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine sbook_machine = { .name = "SPARCbook", .desc = "Sun4m platform, SPARCbook", .init = sbook_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static const struct sun4d_hwdef sun4d_hwdefs[] = { @@ -1709,7 +1709,7 @@ static QEMUMachine ss1000_machine = { .name = "SS-1000", .desc = "Sun4d platform, SPARCserver 1000", .init = ss1000_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 8, }; @@ -1717,7 +1717,7 @@ static QEMUMachine ss2000_machine = { .name = "SS-2000", .desc = "Sun4d platform, SPARCcenter 2000", .init = ss2000_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 20, }; @@ -1896,7 +1896,7 @@ static QEMUMachine ss2_machine = { .name = "SS-2", .desc = "Sun4c platform, SPARCstation 2", .init = ss2_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static void sun4m_register_types(void) diff --git a/hw/versatilepb.c b/hw/versatilepb.c index 41e39d8fb9..f5a742b37f 100644 --- a/hw/versatilepb.c +++ b/hw/versatilepb.c @@ -358,14 +358,14 @@ static QEMUMachine versatilepb_machine = { .name = "versatilepb", .desc = "ARM Versatile/PB (ARM926EJ-S)", .init = vpb_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static QEMUMachine versatileab_machine = { .name = "versatileab", .desc = "ARM Versatile/AB (ARM926EJ-S)", .init = vab_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, }; static void versatile_machine_init(void) diff --git a/hw/vexpress.c b/hw/vexpress.c index d93f057bff..e89694c86e 100644 --- a/hw/vexpress.c +++ b/hw/vexpress.c @@ -477,7 +477,7 @@ static QEMUMachine vexpress_a9_machine = { .name = "vexpress-a9", .desc = "ARM Versatile Express for Cortex-A9", .init = vexpress_a9_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; @@ -485,7 +485,7 @@ static QEMUMachine vexpress_a15_machine = { .name = "vexpress-a15", .desc = "ARM Versatile Express for Cortex-A15", .init = vexpress_a15_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 4, }; diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c index 45d90ab490..264e58a68b 100644 --- a/hw/vfio_pci.c +++ b/hw/vfio_pci.c @@ -275,7 +275,7 @@ static void vfio_enable_intx_kvm(VFIODevice *vdev) int ret, argsz; int32_t *pfd; - if (!kvm_irqchip_in_kernel() || + if (!kvm_irqfds_enabled() || vdev->intx.route.mode != PCI_INTX_ENABLED || !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { return; @@ -438,7 +438,8 @@ static int vfio_enable_intx(VFIODevice *vdev) * Only conditional to avoid generating error messages on platforms * where we won't actually use the result anyway. */ - if (kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { + if (kvm_irqfds_enabled() && + kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) { vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin); } @@ -2413,7 +2413,7 @@ void ppm_save(const char *filename, struct DisplaySurface *ds, Error **errp) } linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width); for (y = 0; y < height; y++) { - qemu_pixman_linebuf_fill(linebuf, ds->image, width, y); + qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y); clearerr(f); ret = fwrite(pixman_image_get_data(linebuf), 1, pixman_image_get_stride(linebuf), f); diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h index f0740d01af..651a000b9f 100644 --- a/hw/virtio-blk.h +++ b/hw/virtio-blk.h @@ -104,7 +104,6 @@ struct VirtIOBlkConf BlockConf conf; char *serial; uint32_t scsi; - uint32_t config_wce; }; #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \ diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index e9b722dd96..d0d6a5e816 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -895,7 +895,6 @@ static Property virtio_blk_properties[] = { #ifdef __linux__ DEFINE_PROP_BIT("scsi", VirtIOPCIProxy, blk.scsi, 0, true), #endif - DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true), DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features), diff --git a/hw/watchdog.c b/hw/watchdog.c index b52acedd98..5c82c17d09 100644 --- a/hw/watchdog.c +++ b/hw/watchdog.c @@ -66,7 +66,7 @@ int select_watchdog(const char *p) QLIST_FOREACH(model, &watchdog_list, entry) { if (strcasecmp(model->wdt_name, p) == 0) { /* add the device */ - opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL); + opts = qemu_opts_create_nofail(qemu_find_opts("device")); qemu_opt_set(opts, "driver", p); return 0; } @@ -26,6 +26,7 @@ */ #include "hw.h" +#include "trace.h" #include "hw/spapr.h" #include "hw/xics.h" @@ -66,6 +67,8 @@ static void icp_check_ipi(struct icp_state *icp, int server) return; } + trace_xics_icp_check_ipi(server, ss->mfrr); + if (XISR(ss)) { ics_reject(icp->ics, XISR(ss)); } @@ -120,11 +123,13 @@ static void icp_set_mfrr(struct icp_state *icp, int server, uint8_t mfrr) static uint32_t icp_accept(struct icp_server_state *ss) { - uint32_t xirr; + uint32_t xirr = ss->xirr; qemu_irq_lower(ss->output); - xirr = ss->xirr; ss->xirr = ss->pending_priority << 24; + + trace_xics_icp_accept(xirr, ss->xirr); + return xirr; } @@ -134,6 +139,7 @@ static void icp_eoi(struct icp_state *icp, int server, uint32_t xirr) /* Send EOI -> ICS */ ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK); + trace_xics_icp_eoi(server, xirr, ss->xirr); ics_eoi(icp->ics, xirr & XISR_MASK); if (!XISR(ss)) { icp_resend(icp, server); @@ -144,6 +150,8 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority) { struct icp_server_state *ss = icp->ss + server; + trace_xics_icp_irq(server, nr, priority); + if ((priority >= CPPR(ss)) || (XISR(ss) && (ss->pending_priority <= priority))) { ics_reject(icp->ics, nr); @@ -153,6 +161,7 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority) } ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK); ss->pending_priority = priority; + trace_xics_icp_raise(ss->xirr, ss->pending_priority); qemu_irq_raise(ss->output); } } @@ -170,13 +179,13 @@ struct ics_irq_state { #define XICS_STATUS_REJECTED 0x4 #define XICS_STATUS_MASKED_PENDING 0x8 uint8_t status; - bool lsi; }; struct ics_state { int nr_irqs; int offset; qemu_irq *qirqs; + bool *islsi; struct ics_irq_state *irqs; struct icp_state *icp; }; @@ -217,10 +226,12 @@ static void set_irq_msi(struct ics_state *ics, int srcno, int val) { struct ics_irq_state *irq = ics->irqs + srcno; + trace_xics_set_irq_msi(srcno, srcno + ics->offset); + if (val) { if (irq->priority == 0xff) { irq->status |= XICS_STATUS_MASKED_PENDING; - /* masked pending */ ; + trace_xics_masked_pending(); } else { icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority); } @@ -231,6 +242,7 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val) { struct ics_irq_state *irq = ics->irqs + srcno; + trace_xics_set_irq_lsi(srcno, srcno + ics->offset); if (val) { irq->status |= XICS_STATUS_ASSERTED; } else { @@ -242,9 +254,8 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val) static void ics_set_irq(void *opaque, int srcno, int val) { struct ics_state *ics = (struct ics_state *)opaque; - struct ics_irq_state *irq = ics->irqs + srcno; - if (irq->lsi) { + if (ics->islsi[srcno]) { set_irq_lsi(ics, srcno, val); } else { set_irq_msi(ics, srcno, val); @@ -279,7 +290,9 @@ static void ics_write_xive(struct ics_state *ics, int nr, int server, irq->priority = priority; irq->saved_priority = saved_priority; - if (irq->lsi) { + trace_xics_ics_write_xive(nr, srcno, server, priority); + + if (ics->islsi[srcno]) { write_xive_lsi(ics, srcno); } else { write_xive_msi(ics, srcno); @@ -290,6 +303,7 @@ static void ics_reject(struct ics_state *ics, int nr) { struct ics_irq_state *irq = ics->irqs + nr - ics->offset; + trace_xics_ics_reject(nr, nr - ics->offset); irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI */ irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */ } @@ -299,10 +313,8 @@ static void ics_resend(struct ics_state *ics) int i; for (i = 0; i < ics->nr_irqs; i++) { - struct ics_irq_state *irq = ics->irqs + i; - /* FIXME: filter by server#? */ - if (irq->lsi) { + if (ics->islsi[i]) { resend_lsi(ics, i); } else { resend_msi(ics, i); @@ -315,7 +327,9 @@ static void ics_eoi(struct ics_state *ics, int nr) int srcno = nr - ics->offset; struct ics_irq_state *irq = ics->irqs + srcno; - if (irq->lsi) { + trace_xics_ics_eoi(nr); + + if (ics->islsi[srcno]) { irq->status &= ~XICS_STATUS_SENT; } } @@ -337,7 +351,7 @@ void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi) { assert(ics_valid_irq(icp->ics, irq)); - icp->ics->irqs[irq - icp->ics->offset].lsi = lsi; + icp->ics->islsi[irq - icp->ics->offset] = lsi; } static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr, @@ -495,16 +509,14 @@ static void xics_reset(void *opaque) for (i = 0; i < icp->nr_servers; i++) { icp->ss[i].xirr = 0; - icp->ss[i].pending_priority = 0; + icp->ss[i].pending_priority = 0xff; icp->ss[i].mfrr = 0xff; /* Make all outputs are deasserted */ qemu_set_irq(icp->ss[i].output, 0); } + memset(ics->irqs, 0, sizeof(struct ics_irq_state) * ics->nr_irqs); for (i = 0; i < ics->nr_irqs; i++) { - /* Reset everything *except* the type */ - ics->irqs[i].server = 0; - ics->irqs[i].status = 0; ics->irqs[i].priority = 0xff; ics->irqs[i].saved_priority = 0xff; } @@ -549,8 +561,9 @@ struct icp_state *xics_system_init(int nr_irqs) ics = g_malloc0(sizeof(*ics)); ics->nr_irqs = nr_irqs; - ics->offset = 16; + ics->offset = XICS_IRQ_BASE; ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state)); + ics->islsi = g_malloc0(nr_irqs * sizeof(bool)); icp->ics = ics; ics->icp = icp; @@ -28,6 +28,7 @@ #define __XICS_H__ #define XICS_IPI 0x2 +#define XICS_IRQ_BASE 0x10 struct icp_state; diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c index 1f12a3d1ad..9ca22a4e7d 100644 --- a/hw/xilinx_zynq.c +++ b/hw/xilinx_zynq.c @@ -57,6 +57,7 @@ static inline void zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq, DeviceState *dev; SysBusDevice *busdev; SSIBus *spi; + DeviceState *flash_dev; int i, j; int num_busses = is_qspi ? NUM_QSPI_BUSSES : 1; int num_ss = is_qspi ? NUM_QSPI_FLASHES : NUM_SPI_FLASHES; @@ -81,11 +82,11 @@ static inline void zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq, spi = (SSIBus *)qdev_get_child_bus(dev, bus_name); for (j = 0; j < num_ss; ++j) { - dev = ssi_create_slave_no_init(spi, "m25p80"); - qdev_prop_set_string(dev, "partname", "n25q128"); - qdev_init_nofail(dev); + flash_dev = ssi_create_slave_no_init(spi, "m25p80"); + qdev_prop_set_string(flash_dev, "partname", "n25q128"); + qdev_init_nofail(flash_dev); - cs_line = qdev_get_gpio_in(dev, 0); + cs_line = qdev_get_gpio_in(flash_dev, 0); sysbus_connect_irq(busdev, i * num_ss + j + 1, cs_line); } } @@ -200,7 +201,7 @@ static QEMUMachine zynq_machine = { .name = "xilinx-zynq-a9", .desc = "Xilinx Zynq Platform Baseboard for Cortex-A9", .init = zynq_init, - .use_scsi = 1, + .block_default_type = IF_SCSI, .max_cpus = 1, .no_sdcard = 1 }; diff --git a/main-loop.c b/main-loop.c index c87624e621..7dba6f6e35 100644 --- a/main-loop.c +++ b/main-loop.c @@ -432,11 +432,6 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) return aio_bh_new(qemu_aio_context, cb, opaque); } -void qemu_aio_flush(void) -{ - aio_flush(qemu_aio_context); -} - bool qemu_aio_wait(void) { return aio_poll(qemu_aio_context, true); diff --git a/pc-bios/README b/pc-bios/README index 303713099e..eff3de7615 100644 --- a/pc-bios/README +++ b/pc-bios/README @@ -17,7 +17,7 @@ - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware implementation for certain IBM POWER hardware. The sources are at https://github.com/dgibson/SLOF, and the image currently in qemu is - built from git tag qemu-slof-20120731. + built from git tag qemu-slof-20121018. - sgabios (the Serial Graphics Adapter option ROM) provides a means for legacy x86 software to communicate with an attached serial console as diff --git a/pc-bios/acpi-dsdt.aml b/pc-bios/acpi-dsdt.aml Binary files differindex bb3dd83a56..18b4dc1aa5 100644 --- a/pc-bios/acpi-dsdt.aml +++ b/pc-bios/acpi-dsdt.aml diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin Binary files differindex dc9b57ddc9..3eefff4cf8 100644 --- a/pc-bios/bios.bin +++ b/pc-bios/bios.bin diff --git a/pc-bios/q35-acpi-dsdt.aml b/pc-bios/q35-acpi-dsdt.aml Binary files differnew file mode 100644 index 0000000000..8a50559514 --- /dev/null +++ b/pc-bios/q35-acpi-dsdt.aml diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin Binary files differindex 84ba6b83f3..3410f4fff4 100644 --- a/pc-bios/slof.bin +++ b/pc-bios/slof.bin diff --git a/pixman b/pixman -Subproject 97336fad32acf802003855cd8bd6477fa49a12e +Subproject a5e5179b5624c99c812e9bf6e7b907e355a811e diff --git a/qemu-aio.h b/qemu-aio.h index 3889fe97a4..31884a8f16 100644 --- a/qemu-aio.h +++ b/qemu-aio.h @@ -162,10 +162,6 @@ void qemu_bh_cancel(QEMUBH *bh); */ void qemu_bh_delete(QEMUBH *bh); -/* Flush any pending AIO operation. This function will block until all - * outstanding AIO operations have been completed or cancelled. */ -void aio_flush(AioContext *ctx); - /* Return whether there are any pending callbacks from the GSource * attached to the AioContext. * @@ -196,7 +192,7 @@ typedef int (AioFlushHandler)(void *opaque); /* Register a file descriptor and associated callbacks. Behaves very similarly * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will - * be invoked when using either qemu_aio_wait() or qemu_aio_flush(). + * be invoked when using qemu_aio_wait(). * * Code that invokes AIO completion functions should rely on this function * instead of qemu_set_fd_handler[2]. @@ -211,7 +207,7 @@ void aio_set_fd_handler(AioContext *ctx, /* Register an event notifier and associated callbacks. Behaves very similarly * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks - * will be invoked when using either qemu_aio_wait() or qemu_aio_flush(). + * will be invoked when using qemu_aio_wait(). * * Code that invokes AIO completion functions should rely on this function * instead of event_notifier_set_handler. @@ -228,7 +224,6 @@ GSource *aio_get_g_source(AioContext *ctx); /* Functions to operate on the main QEMU AioContext. */ -void qemu_aio_flush(void); bool qemu_aio_wait(void); void qemu_aio_set_event_notifier(EventNotifier *notifier, EventNotifierHandler *io_read, diff --git a/qemu-config.c b/qemu-config.c index aa78fb9ea7..b4ce0d8034 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -579,6 +579,10 @@ static QemuOptsList qemu_machine_opts = { .name = "usb", .type = QEMU_OPT_BOOL, .help = "Set on/off to enable/disable usb", + }, { + .name = "nvram", + .type = QEMU_OPT_STRING, + .help = "Drive backing persistent NVRAM", }, { /* End of list */ } }, @@ -756,7 +760,7 @@ int qemu_global_option(const char *str) return -1; } - opts = qemu_opts_create(&qemu_global_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&qemu_global_opts); qemu_opt_set(opts, "driver", driver); qemu_opt_set(opts, "property", property); qemu_opt_set(opts, "value", str+offset+1); @@ -843,7 +847,7 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname) error_free(local_err); goto out; } - opts = qemu_opts_create(list, NULL, 0, NULL); + opts = qemu_opts_create_nofail(list); continue; } if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2) { diff --git a/qemu-img.c b/qemu-img.c index e29e01b729..c989a52564 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -294,13 +294,14 @@ static int add_old_style_options(const char *fmt, QEMUOptionParameter *list, static int img_create(int argc, char **argv) { - int c, ret = 0; + int c; uint64_t img_size = -1; const char *fmt = "raw"; const char *base_fmt = NULL; const char *filename; const char *base_filename = NULL; char *options = NULL; + Error *local_err = NULL; for(;;) { c = getopt(argc, argv, "F:b:f:he6o:"); @@ -350,23 +351,23 @@ static int img_create(int argc, char **argv) error_report("Invalid image size specified! You may use k, M, G or " "T suffixes for "); error_report("kilobytes, megabytes, gigabytes and terabytes."); - ret = -1; - goto out; + return 1; } img_size = (uint64_t)sval; } if (options && is_help_option(options)) { - ret = print_block_option_help(filename, fmt); - goto out; + return print_block_option_help(filename, fmt); } - ret = bdrv_img_create(filename, fmt, base_filename, base_fmt, - options, img_size, BDRV_O_FLAGS); -out: - if (ret) { + bdrv_img_create(filename, fmt, base_filename, base_fmt, + options, img_size, BDRV_O_FLAGS, &local_err); + if (error_is_set(&local_err)) { + error_report("%s", error_get_pretty(local_err)); + error_free(local_err); return 1; } + return 0; } @@ -1933,7 +1934,7 @@ static int img_resize(int argc, char **argv) } /* Parse size */ - param = qemu_opts_create(&resize_options, NULL, 0, NULL); + param = qemu_opts_create_nofail(&resize_options); if (qemu_opt_set(param, BLOCK_OPT_SIZE, size)) { /* Error message already printed when size parsing fails */ ret = -1; @@ -265,6 +265,18 @@ static int do_co_write_zeroes(int64_t offset, int count, int *total) } } +static int do_write_compressed(char *buf, int64_t offset, int count, int *total) +{ + int ret; + + ret = bdrv_write_compressed(bs, offset >> 9, (uint8_t *)buf, count >> 9); + if (ret < 0) { + return ret; + } + *total = count; + return 1; +} + static int do_load_vmstate(char *buf, int64_t offset, int count, int *total) { *total = bdrv_load_vmstate(bs, (uint8_t *)buf, offset, count); @@ -687,6 +699,7 @@ static void write_help(void) " Writes into a segment of the currently open file, using a buffer\n" " filled with a set pattern (0xcdcdcdcd).\n" " -b, -- write to the VM state rather than the virtual disk\n" +" -c, -- write compressed data with bdrv_write_compressed\n" " -p, -- use bdrv_pwrite to write the file\n" " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" @@ -703,7 +716,7 @@ static const cmdinfo_t write_cmd = { .cfunc = write_f, .argmin = 2, .argmax = -1, - .args = "[-bCpqz] [-P pattern ] off len", + .args = "[-bcCpqz] [-P pattern ] off len", .oneline = "writes a number of bytes at a specified offset", .help = write_help, }; @@ -712,6 +725,7 @@ static int write_f(int argc, char **argv) { struct timeval t1, t2; int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0; + int cflag = 0; int c, cnt; char *buf = NULL; int64_t offset; @@ -720,11 +734,14 @@ static int write_f(int argc, char **argv) int total = 0; int pattern = 0xcd; - while ((c = getopt(argc, argv, "bCpP:qz")) != EOF) { + while ((c = getopt(argc, argv, "bcCpP:qz")) != EOF) { switch (c) { case 'b': bflag = 1; break; + case 'c': + cflag = 1; + break; case 'C': Cflag = 1; break; @@ -801,6 +818,8 @@ static int write_f(int argc, char **argv) cnt = do_save_vmstate(buf, offset, count, &total); } else if (zflag) { cnt = do_co_write_zeroes(offset, count, &total); + } else if (cflag) { + cnt = do_write_compressed(buf, offset, count, &total); } else { cnt = do_write(buf, offset, count, &total); } @@ -1652,6 +1671,67 @@ static const cmdinfo_t map_cmd = { .oneline = "prints the allocated areas of a file", }; +static int break_f(int argc, char **argv) +{ + int ret; + + ret = bdrv_debug_breakpoint(bs, argv[1], argv[2]); + if (ret < 0) { + printf("Could not set breakpoint: %s\n", strerror(-ret)); + } + + return 0; +} + +static const cmdinfo_t break_cmd = { + .name = "break", + .argmin = 2, + .argmax = 2, + .cfunc = break_f, + .args = "event tag", + .oneline = "sets a breakpoint on event and tags the stopped " + "request as tag", +}; + +static int resume_f(int argc, char **argv) +{ + int ret; + + ret = bdrv_debug_resume(bs, argv[1]); + if (ret < 0) { + printf("Could not resume request: %s\n", strerror(-ret)); + } + + return 0; +} + +static const cmdinfo_t resume_cmd = { + .name = "resume", + .argmin = 1, + .argmax = 1, + .cfunc = resume_f, + .args = "tag", + .oneline = "resumes the request tagged as tag", +}; + +static int wait_break_f(int argc, char **argv) +{ + while (!bdrv_debug_is_suspended(bs, argv[1])) { + qemu_aio_wait(); + } + + return 0; +} + +static const cmdinfo_t wait_break_cmd = { + .name = "wait_break", + .argmin = 1, + .argmax = 1, + .cfunc = wait_break_f, + .args = "tag", + .oneline = "waits for the suspension of a request", +}; + static int abort_f(int argc, char **argv) { abort(); @@ -1915,6 +1995,9 @@ int main(int argc, char **argv) add_command(&discard_cmd); add_command(&alloc_cmd); add_command(&map_cmd); + add_command(&break_cmd); + add_command(&resume_cmd); + add_command(&wait_break_cmd); add_command(&abort_cmd); add_args_command(init_args_command); diff --git a/qemu-option.c b/qemu-option.c index 27891e74e7..94557cfde7 100644 --- a/qemu-option.c +++ b/qemu-option.c @@ -602,26 +602,36 @@ static void qemu_opt_del(QemuOpt *opt) g_free(opt); } -static void opt_set(QemuOpts *opts, const char *name, const char *value, - bool prepend, Error **errp) +static bool opts_accepts_any(const QemuOpts *opts) +{ + return opts->list->desc[0].name == NULL; +} + +static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc, + const char *name) { - QemuOpt *opt; - const QemuOptDesc *desc = opts->list->desc; - Error *local_err = NULL; int i; for (i = 0; desc[i].name != NULL; i++) { if (strcmp(desc[i].name, name) == 0) { - break; + return &desc[i]; } } - if (desc[i].name == NULL) { - if (i == 0) { - /* empty list -> allow any */; - } else { - error_set(errp, QERR_INVALID_PARAMETER, name); - return; - } + + return NULL; +} + +static void opt_set(QemuOpts *opts, const char *name, const char *value, + bool prepend, Error **errp) +{ + QemuOpt *opt; + const QemuOptDesc *desc; + Error *local_err = NULL; + + desc = find_desc_by_name(opts->list->desc, name); + if (!desc && !opts_accepts_any(opts)) { + error_set(errp, QERR_INVALID_PARAMETER, name); + return; } opt = g_malloc0(sizeof(*opt)); @@ -632,9 +642,7 @@ static void opt_set(QemuOpts *opts, const char *name, const char *value, } else { QTAILQ_INSERT_TAIL(&opts->head, opt, next); } - if (desc[i].name != NULL) { - opt->desc = desc+i; - } + opt->desc = desc; if (value) { opt->str = g_strdup(value); } @@ -669,30 +677,43 @@ int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val) { QemuOpt *opt; const QemuOptDesc *desc = opts->list->desc; - int i; - for (i = 0; desc[i].name != NULL; i++) { - if (strcmp(desc[i].name, name) == 0) { - break; - } - } - if (desc[i].name == NULL) { - if (i == 0) { - /* empty list -> allow any */; - } else { - qerror_report(QERR_INVALID_PARAMETER, name); - return -1; - } + opt = g_malloc0(sizeof(*opt)); + opt->desc = find_desc_by_name(desc, name); + if (!opt->desc && !opts_accepts_any(opts)) { + qerror_report(QERR_INVALID_PARAMETER, name); + g_free(opt); + return -1; } - opt = g_malloc0(sizeof(*opt)); opt->name = g_strdup(name); opt->opts = opts; + opt->value.boolean = !!val; + opt->str = g_strdup(val ? "on" : "off"); QTAILQ_INSERT_TAIL(&opts->head, opt, next); - if (desc[i].name != NULL) { - opt->desc = desc+i; + + return 0; +} + +int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val) +{ + QemuOpt *opt; + const QemuOptDesc *desc = opts->list->desc; + + opt = g_malloc0(sizeof(*opt)); + opt->desc = find_desc_by_name(desc, name); + if (!opt->desc && !opts_accepts_any(opts)) { + qerror_report(QERR_INVALID_PARAMETER, name); + g_free(opt); + return -1; } - opt->value.boolean = !!val; + + opt->name = g_strdup(name); + opt->opts = opts; + opt->value.uint = val; + opt->str = g_strdup_printf("%" PRId64, val); + QTAILQ_INSERT_TAIL(&opts->head, opt, next); + return 0; } @@ -781,6 +802,15 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, return opts; } +QemuOpts *qemu_opts_create_nofail(QemuOptsList *list) +{ + QemuOpts *opts; + Error *errp = NULL; + opts = qemu_opts_create(list, NULL, 0, &errp); + assert_no_error(errp); + return opts; +} + void qemu_opts_reset(QemuOptsList *list) { QemuOpts *opts, *next_opts; @@ -1068,23 +1098,15 @@ void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp) QemuOpt *opt; Error *local_err = NULL; - assert(opts->list->desc[0].name == NULL); + assert(opts_accepts_any(opts)); QTAILQ_FOREACH(opt, &opts->head, next) { - int i; - - for (i = 0; desc[i].name != NULL; i++) { - if (strcmp(desc[i].name, opt->name) == 0) { - break; - } - } - if (desc[i].name == NULL) { + opt->desc = find_desc_by_name(desc, opt->name); + if (!opt->desc) { error_set(errp, QERR_INVALID_PARAMETER, opt->name); return; } - opt->desc = &desc[i]; - qemu_opt_parse(opt, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); diff --git a/qemu-option.h b/qemu-option.h index ca729862d5..002dd07ee5 100644 --- a/qemu-option.h +++ b/qemu-option.h @@ -126,6 +126,7 @@ int qemu_opt_set(QemuOpts *opts, const char *name, const char *value); void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value, Error **errp); int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val); +int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val); typedef int (*qemu_opt_loopfunc)(const char *name, const char *value, void *opaque); int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque, int abort_on_failure); @@ -133,6 +134,7 @@ int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque, QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id); QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, int fail_if_exists, Error **errp); +QemuOpts *qemu_opts_create_nofail(QemuOptsList *list); void qemu_opts_reset(QemuOptsList *list); void qemu_opts_loc_restore(QemuOpts *opts); int qemu_opts_set(QemuOptsList *list, const char *id, diff --git a/qemu-pixman.c b/qemu-pixman.c index e46e1804f6..e7263fb2bf 100644 --- a/qemu-pixman.c +++ b/qemu-pixman.c @@ -21,7 +21,7 @@ int qemu_pixman_get_type(int rshift, int gshift, int bshift) if (rshift == 0) { type = PIXMAN_TYPE_ABGR; } else { -#if PIXMAN_VERSION >= PIXMAN_VERSION_ENCODE(0, 21, 8) +#if PIXMAN_VERSION >= PIXMAN_VERSION_ENCODE(0, 16, 0) type = PIXMAN_TYPE_BGRA; #endif } @@ -52,10 +52,10 @@ pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format, } void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb, - int width, int y) + int width, int x, int y) { pixman_image_composite(PIXMAN_OP_SRC, fb, NULL, linebuf, - 0, y, 0, 0, 0, 0, width, 1); + x, y, 0, 0, 0, 0, width, 1); } pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format, diff --git a/qemu-pixman.h b/qemu-pixman.h index bee55eb7da..3c05c83a7c 100644 --- a/qemu-pixman.h +++ b/qemu-pixman.h @@ -31,7 +31,7 @@ pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf); pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format, int width); void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb, - int width, int y); + int width, int x, int y); pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format, pixman_image_t *image); void qemu_pixman_image_unref(pixman_image_t *image); diff --git a/qemu-sockets.c b/qemu-sockets.c index d314cf1d1b..c52a40a411 100644 --- a/qemu-sockets.c +++ b/qemu-sockets.c @@ -579,7 +579,7 @@ int inet_listen(const char *str, char *ostr, int olen, addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); inet_addr_to_opts(opts, addr); qapi_free_InetSocketAddress(addr); sock = inet_listen_opts(opts, port_offset, errp); @@ -618,7 +618,7 @@ int inet_connect(const char *str, Error **errp) addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); inet_addr_to_opts(opts, addr); qapi_free_InetSocketAddress(addr); sock = inet_connect_opts(opts, errp, NULL, NULL); @@ -652,7 +652,7 @@ int inet_nonblocking_connect(const char *str, addr = inet_parse(str, errp); if (addr != NULL) { - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); inet_addr_to_opts(opts, addr); qapi_free_InetSocketAddress(addr); sock = inet_connect_opts(opts, errp, callback, opaque); @@ -795,7 +795,7 @@ int unix_listen(const char *str, char *ostr, int olen, Error **errp) char *path, *optstr; int sock, len; - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); optstr = strchr(str, ','); if (optstr) { @@ -823,7 +823,7 @@ int unix_connect(const char *path, Error **errp) QemuOpts *opts; int sock; - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); qemu_opt_set(opts, "path", path); sock = unix_connect_opts(opts, errp, NULL, NULL); qemu_opts_del(opts); @@ -840,7 +840,7 @@ int unix_nonblocking_connect(const char *path, g_assert(callback != NULL); - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); qemu_opt_set(opts, "path", path); sock = unix_connect_opts(opts, errp, callback, opaque); qemu_opts_del(opts); @@ -891,7 +891,7 @@ int socket_connect(SocketAddress *addr, Error **errp, QemuOpts *opts; int fd; - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); switch (addr->kind) { case SOCKET_ADDRESS_KIND_INET: inet_addr_to_opts(opts, addr->inet); @@ -922,7 +922,7 @@ int socket_listen(SocketAddress *addr, Error **errp) QemuOpts *opts; int fd; - opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL); + opts = qemu_opts_create_nofail(&dummy_opts); switch (addr->kind) { case SOCKET_ADDRESS_KIND_INET: inet_addr_to_opts(opts, addr->inet); diff --git a/roms/SLOF b/roms/SLOF -Subproject f21f7a3f46b557eb5923f899ce8b4401b3cc6d9 +Subproject 0ad10f26c94a86a0c9c3970e53f9a9f6a744055 diff --git a/roms/seabios b/roms/seabios -Subproject b1c35f2b28cc0c94ebed8176ff61ac0e0b37779 +Subproject e8a76b0f225bba5ba9d63ab227e0a37b3beb105 diff --git a/target-alpha/helper.c b/target-alpha/helper.c index d9d7f75b58..2430f70aca 100644 --- a/target-alpha/helper.c +++ b/target-alpha/helper.c @@ -494,16 +494,6 @@ void cpu_dump_state (CPUAlphaState *env, FILE *f, fprintf_function cpu_fprintf, cpu_fprintf(f, "\n"); } -void do_restore_state(CPUAlphaState *env, uintptr_t retaddr) -{ - if (retaddr) { - TranslationBlock *tb = tb_find_pc(retaddr); - if (tb) { - cpu_restore_state(tb, env, retaddr); - } - } -} - /* This should only be called from translate, via gen_excp. We expect that ENV->PC has already been updated. */ void QEMU_NORETURN helper_excp(CPUAlphaState *env, int excp, int error) @@ -519,7 +509,9 @@ void QEMU_NORETURN dynamic_excp(CPUAlphaState *env, uintptr_t retaddr, { env->exception_index = excp; env->error_code = error; - do_restore_state(env, retaddr); + if (retaddr) { + cpu_restore_state(env, retaddr); + } cpu_loop_exit(env); } diff --git a/target-alpha/mem_helper.c b/target-alpha/mem_helper.c index 617836cc6c..64b33f6518 100644 --- a/target-alpha/mem_helper.c +++ b/target-alpha/mem_helper.c @@ -94,7 +94,9 @@ static void do_unaligned_access(CPUAlphaState *env, target_ulong addr, uint64_t pc; uint32_t insn; - do_restore_state(env, retaddr); + if (retaddr) { + cpu_restore_state(env, retaddr); + } pc = env->pc; insn = cpu_ldl_code(env, pc); @@ -143,7 +145,9 @@ void tlb_fill(CPUAlphaState *env, target_ulong addr, int is_write, ret = cpu_alpha_handle_mmu_fault(env, addr, is_write, mmu_idx); if (unlikely(ret != 0)) { - do_restore_state(env, retaddr); + if (retaddr) { + cpu_restore_state(env, retaddr); + } /* Exception index and error code are already set */ cpu_loop_exit(env); } diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index 6e3ab90e3b..1fcc975945 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -74,19 +74,13 @@ uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def, void tlb_fill(CPUARMState *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; int ret; ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx); if (unlikely(ret)) { if (retaddr) { /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } + cpu_restore_state(env, retaddr); } raise_exception(env, env->exception_index); } diff --git a/target-cris/op_helper.c b/target-cris/op_helper.c index a7468d41c6..31db42494d 100644 --- a/target-cris/op_helper.c +++ b/target-cris/op_helper.c @@ -57,7 +57,6 @@ void tlb_fill(CPUCRISState *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; int ret; D_LOG("%s pc=%x tpc=%x ra=%p\n", __func__, @@ -66,12 +65,7 @@ void tlb_fill(CPUCRISState *env, target_ulong addr, int is_write, int mmu_idx, if (unlikely(ret)) { if (retaddr) { /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - + if (cpu_restore_state(env, retaddr)) { /* Evaluate flags after retranslation. */ helper_top_evaluate_flags(env); } diff --git a/target-i386/helper.c b/target-i386/helper.c index bf206cfa97..00341c5233 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -1196,15 +1196,12 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, void cpu_report_tpr_access(CPUX86State *env, TPRAccess access) { - TranslationBlock *tb; - if (kvm_enabled()) { env->tpr_access_type = access; cpu_interrupt(env, CPU_INTERRUPT_TPR); } else { - tb = tb_find_pc(env->mem_io_pc); - cpu_restore_state(tb, env, env->mem_io_pc); + cpu_restore_state(env, env->mem_io_pc); apic_handle_tpr_access_report(env->apic_state, env->eip, access); } diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c index 7f99c7cfe3..d0be77b1ed 100644 --- a/target-i386/mem_helper.c +++ b/target-i386/mem_helper.c @@ -135,19 +135,13 @@ void helper_boundl(CPUX86State *env, target_ulong a0, int v) void tlb_fill(CPUX86State *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; int ret; ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx); if (ret) { if (retaddr) { /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } + cpu_restore_state(env, retaddr); } raise_exception_err(env, env->exception_index, env->error_code); } diff --git a/target-lm32/op_helper.c b/target-lm32/op_helper.c index 7b91d8c31e..97b9625c1a 100644 --- a/target-lm32/op_helper.c +++ b/target-lm32/op_helper.c @@ -76,19 +76,13 @@ uint32_t helper_rcsr_jrx(CPULM32State *env) void tlb_fill(CPULM32State *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; int ret; ret = cpu_lm32_handle_mmu_fault(env, addr, is_write, mmu_idx); if (unlikely(ret)) { if (retaddr) { /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } + cpu_restore_state(env, retaddr); } cpu_loop_exit(env); } diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c index aa005048e1..b97ba5e28f 100644 --- a/target-m68k/op_helper.c +++ b/target-m68k/op_helper.c @@ -56,19 +56,13 @@ extern int semihosting_enabled; void tlb_fill(CPUM68KState *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; int ret; ret = cpu_m68k_handle_mmu_fault(env, addr, is_write, mmu_idx); if (unlikely(ret)) { if (retaddr) { /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } + cpu_restore_state(env, retaddr); } cpu_loop_exit(env); } diff --git a/target-microblaze/op_helper.c b/target-microblaze/op_helper.c index 210296b30b..7593517094 100644 --- a/target-microblaze/op_helper.c +++ b/target-microblaze/op_helper.c @@ -44,19 +44,13 @@ void tlb_fill(CPUMBState *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; int ret; ret = cpu_mb_handle_mmu_fault(env, addr, is_write, mmu_idx); if (unlikely(ret)) { if (retaddr) { /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } + cpu_restore_state(env, retaddr); } cpu_loop_exit(env); } diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c index f45d494b14..2972ae3f0a 100644 --- a/target-mips/op_helper.c +++ b/target-mips/op_helper.c @@ -38,7 +38,6 @@ static inline void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env, int error_code, uintptr_t pc) { - TranslationBlock *tb; #if 1 if (exception < 0x100) qemu_log("%s: %d %d\n", __func__, exception, error_code); @@ -48,12 +47,7 @@ static inline void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env, if (pc) { /* now we have a real cpu fault */ - tb = tb_find_pc(pc); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, pc); - } + cpu_restore_state(env, pc); } cpu_loop_exit(env); diff --git a/target-openrisc/mmu_helper.c b/target-openrisc/mmu_helper.c index 59ed371ae0..d2edebcb49 100644 --- a/target-openrisc/mmu_helper.c +++ b/target-openrisc/mmu_helper.c @@ -39,8 +39,6 @@ void tlb_fill(CPUOpenRISCState *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; - unsigned long pc; int ret; ret = cpu_openrisc_handle_mmu_fault(env, addr, is_write, mmu_idx); @@ -48,13 +46,7 @@ void tlb_fill(CPUOpenRISCState *env, target_ulong addr, int is_write, if (ret) { if (retaddr) { /* now we have a real cpu fault. */ - pc = (unsigned long)retaddr; - tb = tb_find_pc(pc); - if (tb) { - /* the PC is inside the translated code. It means that we - have a virtual CPU fault. */ - cpu_restore_state(tb, env, pc); - } + cpu_restore_state(env, retaddr); } /* Raise Exception. */ cpu_loop_exit(env); diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 5f1dc8b7d5..742d4f8ae3 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -355,7 +355,7 @@ struct ppc6xx_tlb_t { typedef struct ppcemb_tlb_t ppcemb_tlb_t; struct ppcemb_tlb_t { - hwaddr RPN; + uint64_t RPN; target_ulong EPN; target_ulong PID; target_ulong size; diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c index 5b5f1bdd23..04c01445f9 100644 --- a/target-ppc/mem_helper.c +++ b/target-ppc/mem_helper.c @@ -275,19 +275,13 @@ STVE(stvewx, cpu_stl_data, bswap32, u32) void tlb_fill(CPUPPCState *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; int ret; ret = cpu_ppc_handle_mmu_fault(env, addr, is_write, mmu_idx); if (unlikely(ret != 0)) { if (likely(retaddr)) { /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (likely(tb)) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } + cpu_restore_state(env, retaddr); } helper_raise_exception_err(env, env->exception_index, env->error_code); } diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c index 6ebc22dd11..91b25e309d 100644 --- a/target-s390x/mem_helper.c +++ b/target-s390x/mem_helper.c @@ -47,19 +47,13 @@ void tlb_fill(CPUS390XState *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; int ret; ret = cpu_s390x_handle_mmu_fault(env, addr, is_write, mmu_idx); if (unlikely(ret != 0)) { if (likely(retaddr)) { /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (likely(tb)) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } + cpu_restore_state(env, retaddr); } cpu_loop_exit(env); } diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c index 60ec4cbc4d..e8e87f5152 100644 --- a/target-sh4/op_helper.c +++ b/target-sh4/op_helper.c @@ -21,21 +21,6 @@ #include "cpu.h" #include "helper.h" -static inline void cpu_restore_state_from_retaddr(CPUSH4State *env, - uintptr_t retaddr) -{ - TranslationBlock *tb; - - if (retaddr) { - tb = tb_find_pc(retaddr); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } - } -} - #ifndef CONFIG_USER_ONLY #include "softmmu_exec.h" @@ -61,7 +46,9 @@ void tlb_fill(CPUSH4State *env, target_ulong addr, int is_write, int mmu_idx, ret = cpu_sh4_handle_mmu_fault(env, addr, is_write, mmu_idx); if (ret) { /* now we have a real cpu fault */ - cpu_restore_state_from_retaddr(env, retaddr); + if (retaddr) { + cpu_restore_state(env, retaddr); + } cpu_loop_exit(env); } } @@ -82,7 +69,9 @@ static inline void QEMU_NORETURN raise_exception(CPUSH4State *env, int index, uintptr_t retaddr) { env->exception_index = index; - cpu_restore_state_from_retaddr(env, retaddr); + if (retaddr) { + cpu_restore_state(env, retaddr); + } cpu_loop_exit(env); } diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h index 375f20a71e..013ecbd063 100644 --- a/target-sparc/cpu.h +++ b/target-sparc/cpu.h @@ -710,7 +710,6 @@ uint64_t cpu_tick_get_count(CPUTimer *timer); void cpu_tick_set_limit(CPUTimer *timer, uint64_t limit); trap_state* cpu_tsptr(CPUSPARCState* env); #endif -void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr); #define TB_FLAG_FPU_ENABLED (1 << 4) #define TB_FLAG_AM_ENABLED (1 << 5) diff --git a/target-sparc/helper.c b/target-sparc/helper.c index 556ac286eb..3c8e865eef 100644 --- a/target-sparc/helper.c +++ b/target-sparc/helper.c @@ -75,7 +75,7 @@ static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a, x1 = (b & 0xffffffff); if (x1 == 0) { - cpu_restore_state2(env, GETPC()); + cpu_restore_state(env, GETPC()); helper_raise_exception(env, TT_DIV_ZERO); } @@ -114,7 +114,7 @@ static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a, x1 = (b & 0xffffffff); if (x1 == 0) { - cpu_restore_state2(env, GETPC()); + cpu_restore_state(env, GETPC()); helper_raise_exception(env, TT_DIV_ZERO); } @@ -147,7 +147,7 @@ int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b) { if (b == 0) { /* Raise divide by zero trap. */ - cpu_restore_state2(env, GETPC()); + cpu_restore_state(env, GETPC()); helper_raise_exception(env, TT_DIV_ZERO); } else if (b == -1) { /* Avoid overflow trap with i386 divide insn. */ @@ -161,7 +161,7 @@ uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b) { if (b == 0) { /* Raise divide by zero trap. */ - cpu_restore_state2(env, GETPC()); + cpu_restore_state(env, GETPC()); helper_raise_exception(env, TT_DIV_ZERO); } return a / b; @@ -193,7 +193,7 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1, return dst; tag_overflow: - cpu_restore_state2(env, GETPC()); + cpu_restore_state(env, GETPC()); helper_raise_exception(env, TT_TOVF); } @@ -222,6 +222,6 @@ target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1, return dst; tag_overflow: - cpu_restore_state2(env, GETPC()); + cpu_restore_state(env, GETPC()); helper_raise_exception(env, TT_TOVF); } diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c index f3e08fd6e6..8d815e5038 100644 --- a/target-sparc/ldst_helper.c +++ b/target-sparc/ldst_helper.c @@ -2393,22 +2393,6 @@ void cpu_unassigned_access(CPUSPARCState *env, hwaddr addr, #endif #endif -/* XXX: make it generic ? */ -void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr) -{ - TranslationBlock *tb; - - if (retaddr) { - /* now we have a real cpu fault */ - tb = tb_find_pc(retaddr); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, env, retaddr); - } - } -} - #if !defined(CONFIG_USER_ONLY) static void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env, target_ulong addr, int is_write, @@ -2418,7 +2402,9 @@ static void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env, printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx "\n", addr, env->pc); #endif - cpu_restore_state2(env, retaddr); + if (retaddr) { + cpu_restore_state(env, retaddr); + } helper_raise_exception(env, TT_UNALIGNED); } @@ -2433,7 +2419,9 @@ void tlb_fill(CPUSPARCState *env, target_ulong addr, int is_write, int mmu_idx, ret = cpu_sparc_handle_mmu_fault(env, addr, is_write, mmu_idx); if (ret) { - cpu_restore_state2(env, retaddr); + if (retaddr) { + cpu_restore_state(env, retaddr); + } cpu_loop_exit(env); } } diff --git a/target-unicore32/op_helper.c b/target-unicore32/op_helper.c index f474d1b59b..b8172ba682 100644 --- a/target-unicore32/op_helper.c +++ b/target-unicore32/op_helper.c @@ -256,20 +256,13 @@ uint32_t HELPER(ror_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i) void tlb_fill(CPUUniCore32State *env, target_ulong addr, int is_write, int mmu_idx, uintptr_t retaddr) { - TranslationBlock *tb; - unsigned long pc; int ret; ret = uc32_cpu_handle_mmu_fault(env, addr, is_write, mmu_idx); if (unlikely(ret)) { if (retaddr) { /* now we have a real cpu fault */ - pc = (unsigned long)retaddr; - tb = tb_find_pc(pc); - if (tb) {/* the PC is inside the translated code. - It means that we have a virtual CPU fault */ - cpu_restore_state(tb, env, pc); - } + cpu_restore_state(env, retaddr); } cpu_loop_exit(env); } diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c index 200fb43c28..bf05575eb5 100644 --- a/target-xtensa/helper.c +++ b/target-xtensa/helper.c @@ -522,7 +522,8 @@ static int get_physical_addr_mmu(CPUXtensaState *env, bool update_tlb, INST_FETCH_PRIVILEGE_CAUSE; } - *access = mmu_attr_to_access(entry->attr); + *access = mmu_attr_to_access(entry->attr) & + ~(dtlb ? PAGE_EXEC : PAGE_READ | PAGE_WRITE); if (!is_access_granted(*access, is_write)) { return dtlb ? (is_write ? diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c index 0e0f21d1a2..84f0449f79 100644 --- a/target-xtensa/op_helper.c +++ b/target-xtensa/op_helper.c @@ -47,22 +47,12 @@ static void do_unaligned_access(CPUXtensaState *env, #define SHIFT 3 #include "softmmu_template.h" -static void do_restore_state(CPUXtensaState *env, uintptr_t pc) -{ - TranslationBlock *tb; - - tb = tb_find_pc(pc); - if (tb) { - cpu_restore_state(tb, env, pc); - } -} - static void do_unaligned_access(CPUXtensaState *env, target_ulong addr, int is_write, int is_user, uintptr_t retaddr) { if (xtensa_option_enabled(env->config, XTENSA_OPTION_UNALIGNED_EXCEPTION) && !xtensa_option_enabled(env->config, XTENSA_OPTION_HW_ALIGNMENT)) { - do_restore_state(env, retaddr); + cpu_restore_state(env, retaddr); HELPER(exception_cause_vaddr)(env, env->pc, LOAD_STORE_ALIGNMENT_CAUSE, addr); } @@ -86,7 +76,7 @@ void tlb_fill(CPUXtensaState *env, paddr & TARGET_PAGE_MASK, access, mmu_idx, page_size); } else { - do_restore_state(env, retaddr); + cpu_restore_state(env, retaddr); HELPER(exception_cause_vaddr)(env, env->pc, ret, vaddr); } } diff --git a/tests/qemu-iotests/045 b/tests/qemu-iotests/045 new file mode 100755 index 0000000000..2b6f1af27a --- /dev/null +++ b/tests/qemu-iotests/045 @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# +# Tests for fdsets. +# +# Copyright (C) 2012 IBM Corp. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os +import iotests +from iotests import qemu_img + +image0 = os.path.join(iotests.test_dir, 'image0') +image1 = os.path.join(iotests.test_dir, 'image1') +image2 = os.path.join(iotests.test_dir, 'image2') +image3 = os.path.join(iotests.test_dir, 'image3') +image4 = os.path.join(iotests.test_dir, 'image4') + +class TestFdSets(iotests.QMPTestCase): + + def setUp(self): + self.vm = iotests.VM() + qemu_img('create', '-f', iotests.imgfmt, image0, '128K') + qemu_img('create', '-f', iotests.imgfmt, image1, '128K') + qemu_img('create', '-f', iotests.imgfmt, image2, '128K') + qemu_img('create', '-f', iotests.imgfmt, image3, '128K') + qemu_img('create', '-f', iotests.imgfmt, image4, '128K') + self.file0 = open(image0, 'r') + self.file1 = open(image1, 'w+') + self.file2 = open(image2, 'r') + self.file3 = open(image3, 'r') + self.file4 = open(image4, 'r') + self.vm.add_fd(self.file0.fileno(), 1, 'image0:r') + self.vm.add_fd(self.file1.fileno(), 1, 'image1:w+') + self.vm.add_fd(self.file2.fileno(), 0, 'image2:r') + self.vm.add_fd(self.file3.fileno(), 2, 'image3:r') + self.vm.add_fd(self.file4.fileno(), 2, 'image4:r') + self.vm.add_drive("/dev/fdset/1") + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + self.file0.close() + self.file1.close() + self.file2.close() + self.file3.close() + self.file4.close() + os.remove(image0) + os.remove(image1) + os.remove(image2) + os.remove(image3) + os.remove(image4) + + def test_query_fdset(self): + result = self.vm.qmp('query-fdsets') + self.assert_qmp(result, 'return[0]/fdset-id', 2) + self.assert_qmp(result, 'return[1]/fdset-id', 1) + self.assert_qmp(result, 'return[2]/fdset-id', 0) + self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image3:r') + self.assert_qmp(result, 'return[0]/fds[1]/opaque', 'image4:r') + self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image0:r') + self.assert_qmp(result, 'return[1]/fds[1]/opaque', 'image1:w+') + self.assert_qmp(result, 'return[2]/fds[0]/opaque', 'image2:r') + self.vm.shutdown() + + def test_remove_fdset(self): + result = self.vm.qmp('remove-fd', fdset_id=2) + self.assert_qmp(result, 'return', {}) + result = self.vm.qmp('query-fdsets') + self.assert_qmp(result, 'return[0]/fdset-id', 1) + self.assert_qmp(result, 'return[1]/fdset-id', 0) + self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image0:r') + self.assert_qmp(result, 'return[0]/fds[1]/opaque', 'image1:w+') + self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image2:r') + self.vm.shutdown() + + def test_remove_fd(self): + result = self.vm.qmp('query-fdsets') + fd_image3 = result['return'][0]['fds'][0]['fd'] + result = self.vm.qmp('remove-fd', fdset_id=2, fd=fd_image3) + self.assert_qmp(result, 'return', {}) + result = self.vm.qmp('query-fdsets') + self.assert_qmp(result, 'return[0]/fdset-id', 2) + self.assert_qmp(result, 'return[1]/fdset-id', 1) + self.assert_qmp(result, 'return[2]/fdset-id', 0) + self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image4:r') + self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image0:r') + self.assert_qmp(result, 'return[1]/fds[1]/opaque', 'image1:w+') + self.assert_qmp(result, 'return[2]/fds[0]/opaque', 'image2:r') + self.vm.shutdown() + + def test_remove_fd_invalid_fdset(self): + result = self.vm.qmp('query-fdsets') + fd_image3 = result['return'][0]['fds'][0]['fd'] + result = self.vm.qmp('remove-fd', fdset_id=3, fd=fd_image3) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + 'File descriptor named \'fdset-id:3, fd:%d\' not found' % fd_image3) + self.vm.shutdown() + + def test_remove_fd_invalid_fd(self): + result = self.vm.qmp('query-fdsets') + result = self.vm.qmp('remove-fd', fdset_id=2, fd=999) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + 'File descriptor named \'fdset-id:2, fd:999\' not found') + self.vm.shutdown() + + def test_add_fd_invalid_fd(self): + result = self.vm.qmp('add-fd', fdset_id=2) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', + 'No file descriptor supplied via SCM_RIGHTS') + self.vm.shutdown() + +if __name__ == '__main__': + iotests.main(supported_fmts=['raw']) diff --git a/tests/qemu-iotests/045.out b/tests/qemu-iotests/045.out new file mode 100644 index 0000000000..3f8a935a08 --- /dev/null +++ b/tests/qemu-iotests/045.out @@ -0,0 +1,5 @@ +...... +---------------------------------------------------------------------- +Ran 6 tests + +OK diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046 new file mode 100755 index 0000000000..e0176f42df --- /dev/null +++ b/tests/qemu-iotests/046 @@ -0,0 +1,215 @@ +#!/bin/bash +# +# Test concurrent cluster allocations +# +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 +_supported_proto generic +_supported_os Linux + +CLUSTER_SIZE=64k +size=128M + +echo +echo "== creating backing file for COW tests ==" + +_make_test_img $size + +function backing_io() +{ + local offset=$1 + local sectors=$2 + local op=$3 + local pattern=0 + local cur_sec=0 + + for i in $(seq 0 $((sectors - 1))); do + cur_sec=$((offset / 65536 + i)) + pattern=$(( ( (cur_sec % 128) + (cur_sec / 128)) % 128 )) + + echo "$op -P $pattern $((cur_sec * 64))k 64k" + done +} + +backing_io 0 16 write | $QEMU_IO $TEST_IMG | _filter_qemu_io + +mv $TEST_IMG $TEST_IMG.base + +_make_test_img -b $TEST_IMG.base 6G + +echo +echo "== Some concurrent requests touching the same cluster ==" + +function overlay_io() +{ +# Allocate middle of cluster 1, then write to somewhere before and after it +cat <<EOF +break write_aio A +aio_write -P 10 0x18000 0x2000 +wait_break A + +aio_write -P 11 0x12000 0x2000 +aio_write -P 12 0x1c000 0x2000 + +resume A +aio_flush +EOF + +# Sequential write case: Alloc middle of cluster 2, then write overlapping +# to next cluster +cat <<EOF +break write_aio A +aio_write -P 20 0x28000 0x2000 +wait_break A +aio_write -P 21 0x2a000 0x10000 +resume A +aio_flush +EOF + +# The same with a gap between both requests +cat <<EOF +break write_aio A +aio_write -P 40 0x48000 0x2000 +wait_break A +aio_write -P 41 0x4c000 0x10000 +resume A +aio_flush +EOF + +# Sequential write, but the next cluster is already allocated +cat <<EOF +write -P 70 0x76000 0x8000 +aio_flush +break write_aio A +aio_write -P 60 0x66000 0x2000 +wait_break A +aio_write -P 61 0x6a000 0xe000 +resume A +aio_flush +EOF + +# Sequential write, but the next cluster is already allocated +# and phyiscally in the right position +cat <<EOF +write -P 89 0x80000 0x1000 +write -P 90 0x96000 0x8000 +aio_flush +discard 0x80000 0x10000 +aio_flush +break write_aio A +aio_write -P 80 0x86000 0x2000 +wait_break A +aio_write -P 81 0x8a000 0xe000 +resume A +aio_flush +EOF + +# Sequential write, and the next cluster is compressed +cat <<EOF +write -P 109 0xa0000 0x1000 +write -c -P 110 0xb0000 0x10000 +aio_flush +discard 0xa0000 0x10000 +aio_flush +break write_aio A +aio_write -P 100 0xa6000 0x2000 +wait_break A +aio_write -P 101 0xaa000 0xe000 +resume A +aio_flush +EOF +} + +overlay_io | $QEMU_IO blkdebug::$TEST_IMG | _filter_qemu_io |\ + sed -e 's/bytes at offset [0-9]*/bytes at offset XXX/g' + +echo +echo "== Verify image content ==" + +function verify_io() +{ + echo read -P 0 0 0x10000 + + echo read -P 1 0x10000 0x2000 + echo read -P 11 0x12000 0x2000 + echo read -P 1 0x14000 0x4000 + echo read -P 10 0x18000 0x2000 + echo read -P 1 0x1a000 0x2000 + echo read -P 12 0x1c000 0x2000 + echo read -P 1 0x1e000 0x2000 + + echo read -P 2 0x20000 0x8000 + echo read -P 20 0x28000 0x2000 + echo read -P 21 0x2a000 0x10000 + echo read -P 3 0x3a000 0x6000 + + echo read -P 4 0x40000 0x8000 + echo read -P 40 0x48000 0x2000 + echo read -P 4 0x4a000 0x2000 + echo read -P 41 0x4c000 0x10000 + echo read -P 5 0x5c000 0x4000 + + echo read -P 6 0x60000 0x6000 + echo read -P 60 0x66000 0x2000 + echo read -P 6 0x68000 0x2000 + echo read -P 61 0x6a000 0xe000 + echo read -P 70 0x78000 0x6000 + echo read -P 7 0x7e000 0x2000 + + echo read -P 8 0x80000 0x6000 + echo read -P 80 0x86000 0x2000 + echo read -P 8 0x88000 0x2000 + echo read -P 81 0x8a000 0xe000 + echo read -P 90 0x98000 0x6000 + echo read -P 9 0x9e000 0x2000 + + echo read -P 10 0xa0000 0x6000 + echo read -P 100 0xa6000 0x2000 + echo read -P 10 0xa8000 0x2000 + echo read -P 101 0xaa000 0xe000 + echo read -P 110 0xb8000 0x8000 +} + +verify_io | $QEMU_IO $TEST_IMG | _filter_qemu_io + +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/046.out b/tests/qemu-iotests/046.out new file mode 100644 index 0000000000..565360fe60 --- /dev/null +++ b/tests/qemu-iotests/046.out @@ -0,0 +1,163 @@ +QA output created by 046 + +== creating backing file for COW tests == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +qemu-io> wrote 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 131072 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 196608 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 262144 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 327680 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 393216 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 458752 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 524288 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 589824 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 655360 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 720896 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 786432 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 851968 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 917504 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset 983040 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=6442450944 backing_file='TEST_DIR/t.IMGFMT.base' + +== Some concurrent requests touching the same cluster == +qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 32768/32768 bytes at offset XXX +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 57344/57344 bytes at offset XXX +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 4096/4096 bytes at offset XXX +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 32768/32768 bytes at offset XXX +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> discard 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 57344/57344 bytes at offset XXX +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 4096/4096 bytes at offset XXX +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> wrote 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> discard 65536/65536 bytes at offset XXX +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A' +qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A' +qemu-io> wrote 8192/8192 bytes at offset XXX +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 57344/57344 bytes at offset XXX +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> +== Verify image content == +qemu-io> read 65536/65536 bytes at offset 0 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 65536 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 73728 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 81920 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 98304 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 106496 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 114688 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 122880 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 32768/32768 bytes at offset 131072 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 163840 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 65536/65536 bytes at offset 172032 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 237568 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 32768/32768 bytes at offset 262144 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 294912 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 303104 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 65536/65536 bytes at offset 311296 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 16384/16384 bytes at offset 376832 +16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 393216 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 417792 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 425984 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 57344/57344 bytes at offset 434176 +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 491520 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 516096 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 524288 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 548864 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 557056 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 57344/57344 bytes at offset 565248 +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 622592 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 647168 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 24576/24576 bytes at offset 655360 +24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 679936 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 8192/8192 bytes at offset 688128 +8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 57344/57344 bytes at offset 696320 +56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> read 32768/32768 bytes at offset 753664 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +qemu-io> No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index a4a9044f24..a0307de06b 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -51,3 +51,5 @@ 042 rw auto quick 043 rw auto backing 044 rw auto +045 rw auto +046 rw auto aio diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 0be5c7e13f..569ca3d804 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -79,6 +79,18 @@ class VM(object): self._num_drives += 1 return self + def add_fd(self, fd, fdset, opaque, opts=''): + '''Pass a file descriptor to the VM''' + options = ['fd=%d' % fd, + 'set=%d' % fdset, + 'opaque=%s' % opaque] + if opts: + options.append(opts) + + self._args.append('-add-fd') + self._args.append(','.join(options)) + return self + def launch(self): '''Launch the VM and establish a QMP connection''' devnull = open('/dev/null', 'rb') diff --git a/tests/test-aio.c b/tests/test-aio.c index f53c908707..a8a4f0c6a5 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -15,6 +15,14 @@ AioContext *ctx; +/* Wait until there are no more BHs or AIO requests */ +static void wait_for_aio(void) +{ + while (aio_poll(ctx, true)) { + /* Do nothing */ + } +} + /* Simple callbacks for testing. */ typedef struct { @@ -78,14 +86,6 @@ static void test_notify(void) g_assert(!aio_poll(ctx, false)); } -static void test_flush(void) -{ - g_assert(!aio_poll(ctx, false)); - aio_notify(ctx); - aio_flush(ctx); - g_assert(!aio_poll(ctx, false)); -} - static void test_bh_schedule(void) { BHTestData data = { .n = 0 }; @@ -116,7 +116,7 @@ static void test_bh_schedule10(void) g_assert(aio_poll(ctx, true)); g_assert_cmpint(data.n, ==, 2); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data.n, ==, 10); g_assert(!aio_poll(ctx, false)); @@ -164,7 +164,7 @@ static void test_bh_delete_from_cb(void) qemu_bh_schedule(data1.bh); g_assert_cmpint(data1.n, ==, 0); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data1.n, ==, data1.max); g_assert(data1.bh == NULL); @@ -200,7 +200,7 @@ static void test_bh_delete_from_cb_many(void) g_assert_cmpint(data4.n, ==, 1); g_assert(data1.bh == NULL); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data1.n, ==, data1.max); g_assert_cmpint(data2.n, ==, data2.max); g_assert_cmpint(data3.n, ==, data3.max); @@ -219,7 +219,7 @@ static void test_bh_flush(void) qemu_bh_schedule(data.bh); g_assert_cmpint(data.n, ==, 0); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data.n, ==, 1); g_assert(!aio_poll(ctx, false)); @@ -281,7 +281,7 @@ static void test_flush_event_notifier(void) g_assert_cmpint(data.active, ==, 9); g_assert(aio_poll(ctx, false)); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data.n, ==, 10); g_assert_cmpint(data.active, ==, 0); g_assert(!aio_poll(ctx, false)); @@ -325,7 +325,7 @@ static void test_wait_event_notifier_noflush(void) g_assert_cmpint(data.n, ==, 2); event_notifier_set(&dummy.e); - aio_flush(ctx); + wait_for_aio(); g_assert_cmpint(data.n, ==, 2); g_assert_cmpint(dummy.n, ==, 1); g_assert_cmpint(dummy.active, ==, 0); @@ -346,7 +346,7 @@ static void test_wait_event_notifier_noflush(void) * - sometimes both the AioContext and the glib main loop wake * themselves up. Hence, some "g_assert(!aio_poll(ctx, false));" * are replaced by "while (g_main_context_iteration(NULL, false));". - * - there is no exact replacement for aio_flush's blocking wait. + * - there is no exact replacement for a blocking wait. * "while (g_main_context_iteration(NULL, true)" seems to work, * but it is not documented _why_ it works. For these tests a * non-blocking loop like "while (g_main_context_iteration(NULL, false)" @@ -637,7 +637,6 @@ int main(int argc, char **argv) g_test_init(&argc, &argv, NULL); g_test_add_func("/aio/notify", test_notify); - g_test_add_func("/aio/flush", test_flush); g_test_add_func("/aio/bh/schedule", test_bh_schedule); g_test_add_func("/aio/bh/schedule10", test_bh_schedule10); g_test_add_func("/aio/bh/cancel", test_bh_cancel); diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c index fea0445fb4..ea8e676b0c 100644 --- a/tests/test-thread-pool.c +++ b/tests/test-thread-pool.c @@ -47,11 +47,19 @@ static void qemu_aio_wait_nonblocking(void) qemu_aio_wait(); } +/* Wait until all aio and bh activity has finished */ +static void qemu_aio_wait_all(void) +{ + while (qemu_aio_wait()) { + /* Do nothing */ + } +} + static void test_submit(void) { WorkerTestData data = { .n = 0 }; thread_pool_submit(worker_cb, &data); - qemu_aio_flush(); + qemu_aio_wait_all(); g_assert_cmpint(data.n, ==, 1); } @@ -63,7 +71,7 @@ static void test_submit_aio(void) /* The callbacks are not called until after the first wait. */ active = 1; g_assert_cmpint(data.ret, ==, -EINPROGRESS); - qemu_aio_flush(); + qemu_aio_wait_all(); g_assert_cmpint(active, ==, 0); g_assert_cmpint(data.n, ==, 1); g_assert_cmpint(data.ret, ==, 0); @@ -84,7 +92,7 @@ static void co_test_cb(void *opaque) data->ret = 0; active--; - /* The test continues in test_submit_co, after qemu_aio_flush... */ + /* The test continues in test_submit_co, after qemu_aio_wait_all... */ } static void test_submit_co(void) @@ -99,9 +107,9 @@ static void test_submit_co(void) g_assert_cmpint(active, ==, 1); g_assert_cmpint(data.ret, ==, -EINPROGRESS); - /* qemu_aio_flush will execute the rest of the coroutine. */ + /* qemu_aio_wait_all will execute the rest of the coroutine. */ - qemu_aio_flush(); + qemu_aio_wait_all(); /* Back here after the coroutine has finished. */ @@ -184,7 +192,7 @@ static void test_cancel(void) } /* Finish execution and execute any remaining callbacks. */ - qemu_aio_flush(); + qemu_aio_wait_all(); g_assert_cmpint(active, ==, 0); for (i = 0; i < 100; i++) { if (data[i].n == 3) { diff --git a/trace-events b/trace-events index 6c6cbf10fd..6cb450a993 100644 --- a/trace-events +++ b/trace-events @@ -1022,3 +1022,16 @@ spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested % spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u" spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u" spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u" + +# hw/xics.c +xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x" +xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr) "icp_accept: XIRR %#"PRIx32"->%#"PRIx32 +xics_icp_eoi(int server, uint32_t xirr, uint32_t new_xirr) "icp_eoi: server %d given XIRR %#"PRIx32" new XIRR %#"PRIx32 +xics_icp_irq(int server, int nr, uint8_t priority) "cpu %d trying to deliver irq %#"PRIx32" priority %#x" +xics_icp_raise(uint32_t xirr, uint8_t pending_priority) "raising IRQ new XIRR=%#x new pending priority=%#x" +xics_set_irq_msi(int srcno, int nr) "set_irq_msi: srcno %d [irq %#x]" +xics_masked_pending(void) "set_irq_msi: masked pending" +xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]" +xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x" +xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]" +xics_ics_eoi(int nr) "ics_eoi: irq %#x" diff --git a/translate-all.c b/translate-all.c index f22e3eedd2..164870a68c 100644 --- a/translate-all.c +++ b/translate-all.c @@ -16,6 +16,12 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ +#ifdef _WIN32 +#include <windows.h> +#else +#include <sys/types.h> +#include <sys/mman.h> +#endif #include <stdarg.h> #include <stdlib.h> #include <stdio.h> @@ -24,15 +30,120 @@ #include "config.h" +#include "qemu-common.h" #define NO_CPU_IO_DEFS #include "cpu.h" #include "disas.h" #include "tcg.h" #include "qemu-timer.h" +#include "memory.h" +#include "exec-memory.h" +#if defined(CONFIG_USER_ONLY) +#include "qemu.h" +#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) +#include <sys/param.h> +#if __FreeBSD_version >= 700104 +#define HAVE_KINFO_GETVMMAP +#define sigqueue sigqueue_freebsd /* avoid redefinition */ +#include <sys/time.h> +#include <sys/proc.h> +#include <machine/profile.h> +#define _KERNEL +#include <sys/user.h> +#undef _KERNEL +#undef sigqueue +#include <libutil.h> +#endif +#endif +#endif + +#include "cputlb.h" +#include "translate-all.h" + +//#define DEBUG_TB_INVALIDATE +//#define DEBUG_FLUSH +/* make various TB consistency checks */ +//#define DEBUG_TB_CHECK + +#if !defined(CONFIG_USER_ONLY) +/* TB consistency checks only implemented for usermode emulation. */ +#undef DEBUG_TB_CHECK +#endif + +#define SMC_BITMAP_USE_THRESHOLD 10 + +/* Code generation and translation blocks */ +static TranslationBlock *tbs; +static int code_gen_max_blocks; +TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; +static int nb_tbs; +/* any access to the tbs or the page table must use this lock */ +spinlock_t tb_lock = SPIN_LOCK_UNLOCKED; + +uint8_t *code_gen_prologue; +static uint8_t *code_gen_buffer; +static size_t code_gen_buffer_size; +/* threshold to flush the translated code buffer */ +static size_t code_gen_buffer_max_size; +static uint8_t *code_gen_ptr; + +typedef struct PageDesc { + /* list of TBs intersecting this ram page */ + TranslationBlock *first_tb; + /* in order to optimize self modifying code, we count the number + of lookups we do to a given page to use a bitmap */ + unsigned int code_write_count; + uint8_t *code_bitmap; +#if defined(CONFIG_USER_ONLY) + unsigned long flags; +#endif +} PageDesc; + +/* In system mode we want L1_MAP to be based on ram offsets, + while in user mode we want it to be based on virtual addresses. */ +#if !defined(CONFIG_USER_ONLY) +#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS +# define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS +#else +# define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS +#endif +#else +# define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS +#endif + +/* The bits remaining after N lower levels of page tables. */ +#define V_L1_BITS_REM \ + ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS) + +#if V_L1_BITS_REM < 4 +#define V_L1_BITS (V_L1_BITS_REM + L2_BITS) +#else +#define V_L1_BITS V_L1_BITS_REM +#endif + +#define V_L1_SIZE ((target_ulong)1 << V_L1_BITS) + +#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS) + +uintptr_t qemu_real_host_page_size; +uintptr_t qemu_host_page_size; +uintptr_t qemu_host_page_mask; + +/* This is a multi-level map on the virtual address space. + The bottom level has pointers to PageDesc. */ +static void *l1_map[V_L1_SIZE]; + +/* statistics */ +static int tb_flush_count; +static int tb_phys_invalidate_count; /* code generation context */ TCGContext tcg_ctx; +static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, + tb_page_addr_t phys_page2); +static TranslationBlock *tb_find_pc(uintptr_t tc_ptr); + void cpu_gen_init(void) { tcg_context_init(&tcg_ctx); @@ -101,8 +212,8 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr /* The cpu state corresponding to 'searched_pc' is restored. */ -int cpu_restore_state(TranslationBlock *tb, - CPUArchState *env, uintptr_t searched_pc) +static int cpu_restore_state_from_tb(TranslationBlock *tb, CPUArchState *env, + uintptr_t searched_pc) { TCGContext *s = &tcg_ctx; int j; @@ -155,3 +266,1624 @@ int cpu_restore_state(TranslationBlock *tb, #endif return 0; } + +bool cpu_restore_state(CPUArchState *env, uintptr_t retaddr) +{ + TranslationBlock *tb; + + tb = tb_find_pc(retaddr); + if (tb) { + cpu_restore_state_from_tb(tb, env, retaddr); + return true; + } + return false; +} + +#ifdef _WIN32 +static inline void map_exec(void *addr, long size) +{ + DWORD old_protect; + VirtualProtect(addr, size, + PAGE_EXECUTE_READWRITE, &old_protect); +} +#else +static inline void map_exec(void *addr, long size) +{ + unsigned long start, end, page_size; + + page_size = getpagesize(); + start = (unsigned long)addr; + start &= ~(page_size - 1); + + end = (unsigned long)addr + size; + end += page_size - 1; + end &= ~(page_size - 1); + + mprotect((void *)start, end - start, + PROT_READ | PROT_WRITE | PROT_EXEC); +} +#endif + +static void page_init(void) +{ + /* NOTE: we can always suppose that qemu_host_page_size >= + TARGET_PAGE_SIZE */ +#ifdef _WIN32 + { + SYSTEM_INFO system_info; + + GetSystemInfo(&system_info); + qemu_real_host_page_size = system_info.dwPageSize; + } +#else + qemu_real_host_page_size = getpagesize(); +#endif + if (qemu_host_page_size == 0) { + qemu_host_page_size = qemu_real_host_page_size; + } + if (qemu_host_page_size < TARGET_PAGE_SIZE) { + qemu_host_page_size = TARGET_PAGE_SIZE; + } + qemu_host_page_mask = ~(qemu_host_page_size - 1); + +#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) + { +#ifdef HAVE_KINFO_GETVMMAP + struct kinfo_vmentry *freep; + int i, cnt; + + freep = kinfo_getvmmap(getpid(), &cnt); + if (freep) { + mmap_lock(); + for (i = 0; i < cnt; i++) { + unsigned long startaddr, endaddr; + + startaddr = freep[i].kve_start; + endaddr = freep[i].kve_end; + if (h2g_valid(startaddr)) { + startaddr = h2g(startaddr) & TARGET_PAGE_MASK; + + if (h2g_valid(endaddr)) { + endaddr = h2g(endaddr); + page_set_flags(startaddr, endaddr, PAGE_RESERVED); + } else { +#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS + endaddr = ~0ul; + page_set_flags(startaddr, endaddr, PAGE_RESERVED); +#endif + } + } + } + free(freep); + mmap_unlock(); + } +#else + FILE *f; + + last_brk = (unsigned long)sbrk(0); + + f = fopen("/compat/linux/proc/self/maps", "r"); + if (f) { + mmap_lock(); + + do { + unsigned long startaddr, endaddr; + int n; + + n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr); + + if (n == 2 && h2g_valid(startaddr)) { + startaddr = h2g(startaddr) & TARGET_PAGE_MASK; + + if (h2g_valid(endaddr)) { + endaddr = h2g(endaddr); + } else { + endaddr = ~0ul; + } + page_set_flags(startaddr, endaddr, PAGE_RESERVED); + } + } while (!feof(f)); + + fclose(f); + mmap_unlock(); + } +#endif + } +#endif +} + +static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) +{ + PageDesc *pd; + void **lp; + int i; + +#if defined(CONFIG_USER_ONLY) + /* We can't use g_malloc because it may recurse into a locked mutex. */ +# define ALLOC(P, SIZE) \ + do { \ + P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \ + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \ + } while (0) +#else +# define ALLOC(P, SIZE) \ + do { P = g_malloc0(SIZE); } while (0) +#endif + + /* Level 1. Always allocated. */ + lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1)); + + /* Level 2..N-1. */ + for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) { + void **p = *lp; + + if (p == NULL) { + if (!alloc) { + return NULL; + } + ALLOC(p, sizeof(void *) * L2_SIZE); + *lp = p; + } + + lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1)); + } + + pd = *lp; + if (pd == NULL) { + if (!alloc) { + return NULL; + } + ALLOC(pd, sizeof(PageDesc) * L2_SIZE); + *lp = pd; + } + +#undef ALLOC + + return pd + (index & (L2_SIZE - 1)); +} + +static inline PageDesc *page_find(tb_page_addr_t index) +{ + return page_find_alloc(index, 0); +} + +#if !defined(CONFIG_USER_ONLY) +#define mmap_lock() do { } while (0) +#define mmap_unlock() do { } while (0) +#endif + +#if defined(CONFIG_USER_ONLY) +/* Currently it is not recommended to allocate big chunks of data in + user mode. It will change when a dedicated libc will be used. */ +/* ??? 64-bit hosts ought to have no problem mmaping data outside the + region in which the guest needs to run. Revisit this. */ +#define USE_STATIC_CODE_GEN_BUFFER +#endif + +/* ??? Should configure for this, not list operating systems here. */ +#if (defined(__linux__) \ + || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ + || defined(__DragonFly__) || defined(__OpenBSD__) \ + || defined(__NetBSD__)) +# define USE_MMAP +#endif + +/* Minimum size of the code gen buffer. This number is randomly chosen, + but not so small that we can't have a fair number of TB's live. */ +#define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024) + +/* Maximum size of the code gen buffer we'd like to use. Unless otherwise + indicated, this is constrained by the range of direct branches on the + host cpu, as used by the TCG implementation of goto_tb. */ +#if defined(__x86_64__) +# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) +#elif defined(__sparc__) +# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024) +#elif defined(__arm__) +# define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024) +#elif defined(__s390x__) + /* We have a +- 4GB range on the branches; leave some slop. */ +# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) +#else +# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) +#endif + +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024) + +#define DEFAULT_CODE_GEN_BUFFER_SIZE \ + (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ + ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE) + +static inline size_t size_code_gen_buffer(size_t tb_size) +{ + /* Size the buffer. */ + if (tb_size == 0) { +#ifdef USE_STATIC_CODE_GEN_BUFFER + tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE; +#else + /* ??? Needs adjustments. */ + /* ??? If we relax the requirement that CONFIG_USER_ONLY use the + static buffer, we could size this on RESERVED_VA, on the text + segment size of the executable, or continue to use the default. */ + tb_size = (unsigned long)(ram_size / 4); +#endif + } + if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { + tb_size = MIN_CODE_GEN_BUFFER_SIZE; + } + if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) { + tb_size = MAX_CODE_GEN_BUFFER_SIZE; + } + code_gen_buffer_size = tb_size; + return tb_size; +} + +#ifdef USE_STATIC_CODE_GEN_BUFFER +static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] + __attribute__((aligned(CODE_GEN_ALIGN))); + +static inline void *alloc_code_gen_buffer(void) +{ + map_exec(static_code_gen_buffer, code_gen_buffer_size); + return static_code_gen_buffer; +} +#elif defined(USE_MMAP) +static inline void *alloc_code_gen_buffer(void) +{ + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + uintptr_t start = 0; + void *buf; + + /* Constrain the position of the buffer based on the host cpu. + Note that these addresses are chosen in concert with the + addresses assigned in the relevant linker script file. */ +# if defined(__PIE__) || defined(__PIC__) + /* Don't bother setting a preferred location if we're building + a position-independent executable. We're more likely to get + an address near the main executable if we let the kernel + choose the address. */ +# elif defined(__x86_64__) && defined(MAP_32BIT) + /* Force the memory down into low memory with the executable. + Leave the choice of exact location with the kernel. */ + flags |= MAP_32BIT; + /* Cannot expect to map more than 800MB in low memory. */ + if (code_gen_buffer_size > 800u * 1024 * 1024) { + code_gen_buffer_size = 800u * 1024 * 1024; + } +# elif defined(__sparc__) + start = 0x40000000ul; +# elif defined(__s390x__) + start = 0x90000000ul; +# endif + + buf = mmap((void *)start, code_gen_buffer_size, + PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0); + return buf == MAP_FAILED ? NULL : buf; +} +#else +static inline void *alloc_code_gen_buffer(void) +{ + void *buf = g_malloc(code_gen_buffer_size); + + if (buf) { + map_exec(buf, code_gen_buffer_size); + } + return buf; +} +#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */ + +static inline void code_gen_alloc(size_t tb_size) +{ + code_gen_buffer_size = size_code_gen_buffer(tb_size); + code_gen_buffer = alloc_code_gen_buffer(); + if (code_gen_buffer == NULL) { + fprintf(stderr, "Could not allocate dynamic translator buffer\n"); + exit(1); + } + + qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE); + + /* Steal room for the prologue at the end of the buffer. This ensures + (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches + from TB's to the prologue are going to be in range. It also means + that we don't need to mark (additional) portions of the data segment + as executable. */ + code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024; + code_gen_buffer_size -= 1024; + + code_gen_buffer_max_size = code_gen_buffer_size - + (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); + code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; + tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock)); +} + +/* Must be called before using the QEMU cpus. 'tb_size' is the size + (in bytes) allocated to the translation buffer. Zero means default + size. */ +void tcg_exec_init(unsigned long tb_size) +{ + cpu_gen_init(); + code_gen_alloc(tb_size); + code_gen_ptr = code_gen_buffer; + tcg_register_jit(code_gen_buffer, code_gen_buffer_size); + page_init(); +#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE) + /* There's no guest base to take into account, so go ahead and + initialize the prologue now. */ + tcg_prologue_init(&tcg_ctx); +#endif +} + +bool tcg_enabled(void) +{ + return code_gen_buffer != NULL; +} + +/* Allocate a new translation block. Flush the translation buffer if + too many translation blocks or too much generated code. */ +static TranslationBlock *tb_alloc(target_ulong pc) +{ + TranslationBlock *tb; + + if (nb_tbs >= code_gen_max_blocks || + (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size) { + return NULL; + } + tb = &tbs[nb_tbs++]; + tb->pc = pc; + tb->cflags = 0; + return tb; +} + +void tb_free(TranslationBlock *tb) +{ + /* In practice this is mostly used for single use temporary TB + Ignore the hard cases and just back up if this TB happens to + be the last one generated. */ + if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) { + code_gen_ptr = tb->tc_ptr; + nb_tbs--; + } +} + +static inline void invalidate_page_bitmap(PageDesc *p) +{ + if (p->code_bitmap) { + g_free(p->code_bitmap); + p->code_bitmap = NULL; + } + p->code_write_count = 0; +} + +/* Set to NULL all the 'first_tb' fields in all PageDescs. */ +static void page_flush_tb_1(int level, void **lp) +{ + int i; + + if (*lp == NULL) { + return; + } + if (level == 0) { + PageDesc *pd = *lp; + + for (i = 0; i < L2_SIZE; ++i) { + pd[i].first_tb = NULL; + invalidate_page_bitmap(pd + i); + } + } else { + void **pp = *lp; + + for (i = 0; i < L2_SIZE; ++i) { + page_flush_tb_1(level - 1, pp + i); + } + } +} + +static void page_flush_tb(void) +{ + int i; + + for (i = 0; i < V_L1_SIZE; i++) { + page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i); + } +} + +/* flush all the translation blocks */ +/* XXX: tb_flush is currently not thread safe */ +void tb_flush(CPUArchState *env1) +{ + CPUArchState *env; + +#if defined(DEBUG_FLUSH) + printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", + (unsigned long)(code_gen_ptr - code_gen_buffer), + nb_tbs, nb_tbs > 0 ? + ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0); +#endif + if ((unsigned long)(code_gen_ptr - code_gen_buffer) + > code_gen_buffer_size) { + cpu_abort(env1, "Internal error: code buffer overflow\n"); + } + nb_tbs = 0; + + for (env = first_cpu; env != NULL; env = env->next_cpu) { + memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); + } + + memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *)); + page_flush_tb(); + + code_gen_ptr = code_gen_buffer; + /* XXX: flush processor icache at this point if cache flush is + expensive */ + tb_flush_count++; +} + +#ifdef DEBUG_TB_CHECK + +static void tb_invalidate_check(target_ulong address) +{ + TranslationBlock *tb; + int i; + + address &= TARGET_PAGE_MASK; + for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { + for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + if (!(address + TARGET_PAGE_SIZE <= tb->pc || + address >= tb->pc + tb->size)) { + printf("ERROR invalidate: address=" TARGET_FMT_lx + " PC=%08lx size=%04x\n", + address, (long)tb->pc, tb->size); + } + } + } +} + +/* verify that all the pages have correct rights for code */ +static void tb_page_check(void) +{ + TranslationBlock *tb; + int i, flags1, flags2; + + for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { + for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + flags1 = page_get_flags(tb->pc); + flags2 = page_get_flags(tb->pc + tb->size - 1); + if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { + printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n", + (long)tb->pc, tb->size, flags1, flags2); + } + } + } +} + +#endif + +/* invalidate one TB */ +static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb, + int next_offset) +{ + TranslationBlock *tb1; + + for (;;) { + tb1 = *ptb; + if (tb1 == tb) { + *ptb = *(TranslationBlock **)((char *)tb1 + next_offset); + break; + } + ptb = (TranslationBlock **)((char *)tb1 + next_offset); + } +} + +static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) +{ + TranslationBlock *tb1; + unsigned int n1; + + for (;;) { + tb1 = *ptb; + n1 = (uintptr_t)tb1 & 3; + tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); + if (tb1 == tb) { + *ptb = tb1->page_next[n1]; + break; + } + ptb = &tb1->page_next[n1]; + } +} + +static inline void tb_jmp_remove(TranslationBlock *tb, int n) +{ + TranslationBlock *tb1, **ptb; + unsigned int n1; + + ptb = &tb->jmp_next[n]; + tb1 = *ptb; + if (tb1) { + /* find tb(n) in circular list */ + for (;;) { + tb1 = *ptb; + n1 = (uintptr_t)tb1 & 3; + tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); + if (n1 == n && tb1 == tb) { + break; + } + if (n1 == 2) { + ptb = &tb1->jmp_first; + } else { + ptb = &tb1->jmp_next[n1]; + } + } + /* now we can suppress tb(n) from the list */ + *ptb = tb->jmp_next[n]; + + tb->jmp_next[n] = NULL; + } +} + +/* reset the jump entry 'n' of a TB so that it is not chained to + another TB */ +static inline void tb_reset_jump(TranslationBlock *tb, int n) +{ + tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n])); +} + +void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) +{ + CPUArchState *env; + PageDesc *p; + unsigned int h, n1; + tb_page_addr_t phys_pc; + TranslationBlock *tb1, *tb2; + + /* remove the TB from the hash list */ + phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); + h = tb_phys_hash_func(phys_pc); + tb_remove(&tb_phys_hash[h], tb, + offsetof(TranslationBlock, phys_hash_next)); + + /* remove the TB from the page list */ + if (tb->page_addr[0] != page_addr) { + p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS); + tb_page_remove(&p->first_tb, tb); + invalidate_page_bitmap(p); + } + if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) { + p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS); + tb_page_remove(&p->first_tb, tb); + invalidate_page_bitmap(p); + } + + tb_invalidated_flag = 1; + + /* remove the TB from the hash list */ + h = tb_jmp_cache_hash_func(tb->pc); + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (env->tb_jmp_cache[h] == tb) { + env->tb_jmp_cache[h] = NULL; + } + } + + /* suppress this TB from the two jump lists */ + tb_jmp_remove(tb, 0); + tb_jmp_remove(tb, 1); + + /* suppress any remaining jumps to this TB */ + tb1 = tb->jmp_first; + for (;;) { + n1 = (uintptr_t)tb1 & 3; + if (n1 == 2) { + break; + } + tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); + tb2 = tb1->jmp_next[n1]; + tb_reset_jump(tb1, n1); + tb1->jmp_next[n1] = NULL; + tb1 = tb2; + } + tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ + + tb_phys_invalidate_count++; +} + +static inline void set_bits(uint8_t *tab, int start, int len) +{ + int end, mask, end1; + + end = start + len; + tab += start >> 3; + mask = 0xff << (start & 7); + if ((start & ~7) == (end & ~7)) { + if (start < end) { + mask &= ~(0xff << (end & 7)); + *tab |= mask; + } + } else { + *tab++ |= mask; + start = (start + 8) & ~7; + end1 = end & ~7; + while (start < end1) { + *tab++ = 0xff; + start += 8; + } + if (start < end) { + mask = ~(0xff << (end & 7)); + *tab |= mask; + } + } +} + +static void build_page_bitmap(PageDesc *p) +{ + int n, tb_start, tb_end; + TranslationBlock *tb; + + p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8); + + tb = p->first_tb; + while (tb != NULL) { + n = (uintptr_t)tb & 3; + tb = (TranslationBlock *)((uintptr_t)tb & ~3); + /* NOTE: this is subtle as a TB may span two physical pages */ + if (n == 0) { + /* NOTE: tb_end may be after the end of the page, but + it is not a problem */ + tb_start = tb->pc & ~TARGET_PAGE_MASK; + tb_end = tb_start + tb->size; + if (tb_end > TARGET_PAGE_SIZE) { + tb_end = TARGET_PAGE_SIZE; + } + } else { + tb_start = 0; + tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); + } + set_bits(p->code_bitmap, tb_start, tb_end - tb_start); + tb = tb->page_next[n]; + } +} + +TranslationBlock *tb_gen_code(CPUArchState *env, + target_ulong pc, target_ulong cs_base, + int flags, int cflags) +{ + TranslationBlock *tb; + uint8_t *tc_ptr; + tb_page_addr_t phys_pc, phys_page2; + target_ulong virt_page2; + int code_gen_size; + + phys_pc = get_page_addr_code(env, pc); + tb = tb_alloc(pc); + if (!tb) { + /* flush must be done */ + tb_flush(env); + /* cannot fail at this point */ + tb = tb_alloc(pc); + /* Don't forget to invalidate previous TB info. */ + tb_invalidated_flag = 1; + } + tc_ptr = code_gen_ptr; + tb->tc_ptr = tc_ptr; + tb->cs_base = cs_base; + tb->flags = flags; + tb->cflags = cflags; + cpu_gen_code(env, tb, &code_gen_size); + code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size + + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); + + /* check next page if needed */ + virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK; + phys_page2 = -1; + if ((pc & TARGET_PAGE_MASK) != virt_page2) { + phys_page2 = get_page_addr_code(env, virt_page2); + } + tb_link_page(tb, phys_pc, phys_page2); + return tb; +} + +/* + * Invalidate all TBs which intersect with the target physical address range + * [start;end[. NOTE: start and end may refer to *different* physical pages. + * 'is_cpu_write_access' should be true if called from a real cpu write + * access: the virtual CPU will exit the current TB if code is modified inside + * this TB. + */ +void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, + int is_cpu_write_access) +{ + while (start < end) { + tb_invalidate_phys_page_range(start, end, is_cpu_write_access); + start &= TARGET_PAGE_MASK; + start += TARGET_PAGE_SIZE; + } +} + +/* + * Invalidate all TBs which intersect with the target physical address range + * [start;end[. NOTE: start and end must refer to the *same* physical page. + * 'is_cpu_write_access' should be true if called from a real cpu write + * access: the virtual CPU will exit the current TB if code is modified inside + * this TB. + */ +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, + int is_cpu_write_access) +{ + TranslationBlock *tb, *tb_next, *saved_tb; + CPUArchState *env = cpu_single_env; + tb_page_addr_t tb_start, tb_end; + PageDesc *p; + int n; +#ifdef TARGET_HAS_PRECISE_SMC + int current_tb_not_found = is_cpu_write_access; + TranslationBlock *current_tb = NULL; + int current_tb_modified = 0; + target_ulong current_pc = 0; + target_ulong current_cs_base = 0; + int current_flags = 0; +#endif /* TARGET_HAS_PRECISE_SMC */ + + p = page_find(start >> TARGET_PAGE_BITS); + if (!p) { + return; + } + if (!p->code_bitmap && + ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD && + is_cpu_write_access) { + /* build code bitmap */ + build_page_bitmap(p); + } + + /* we remove all the TBs in the range [start, end[ */ + /* XXX: see if in some cases it could be faster to invalidate all + the code */ + tb = p->first_tb; + while (tb != NULL) { + n = (uintptr_t)tb & 3; + tb = (TranslationBlock *)((uintptr_t)tb & ~3); + tb_next = tb->page_next[n]; + /* NOTE: this is subtle as a TB may span two physical pages */ + if (n == 0) { + /* NOTE: tb_end may be after the end of the page, but + it is not a problem */ + tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); + tb_end = tb_start + tb->size; + } else { + tb_start = tb->page_addr[1]; + tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); + } + if (!(tb_end <= start || tb_start >= end)) { +#ifdef TARGET_HAS_PRECISE_SMC + if (current_tb_not_found) { + current_tb_not_found = 0; + current_tb = NULL; + if (env->mem_io_pc) { + /* now we have a real cpu fault */ + current_tb = tb_find_pc(env->mem_io_pc); + } + } + if (current_tb == tb && + (current_tb->cflags & CF_COUNT_MASK) != 1) { + /* If we are modifying the current TB, we must stop + its execution. We could be more precise by checking + that the modification is after the current PC, but it + would require a specialized function to partially + restore the CPU state */ + + current_tb_modified = 1; + cpu_restore_state_from_tb(current_tb, env, env->mem_io_pc); + cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, + ¤t_flags); + } +#endif /* TARGET_HAS_PRECISE_SMC */ + /* we need to do that to handle the case where a signal + occurs while doing tb_phys_invalidate() */ + saved_tb = NULL; + if (env) { + saved_tb = env->current_tb; + env->current_tb = NULL; + } + tb_phys_invalidate(tb, -1); + if (env) { + env->current_tb = saved_tb; + if (env->interrupt_request && env->current_tb) { + cpu_interrupt(env, env->interrupt_request); + } + } + } + tb = tb_next; + } +#if !defined(CONFIG_USER_ONLY) + /* if no code remaining, no need to continue to use slow writes */ + if (!p->first_tb) { + invalidate_page_bitmap(p); + if (is_cpu_write_access) { + tlb_unprotect_code_phys(env, start, env->mem_io_vaddr); + } + } +#endif +#ifdef TARGET_HAS_PRECISE_SMC + if (current_tb_modified) { + /* we generate a block containing just the instruction + modifying the memory. It will ensure that it cannot modify + itself */ + env->current_tb = NULL; + tb_gen_code(env, current_pc, current_cs_base, current_flags, 1); + cpu_resume_from_signal(env, NULL); + } +#endif +} + +/* len must be <= 8 and start must be a multiple of len */ +void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) +{ + PageDesc *p; + int offset, b; + +#if 0 + if (1) { + qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n", + cpu_single_env->mem_io_vaddr, len, + cpu_single_env->eip, + cpu_single_env->eip + + (intptr_t)cpu_single_env->segs[R_CS].base); + } +#endif + p = page_find(start >> TARGET_PAGE_BITS); + if (!p) { + return; + } + if (p->code_bitmap) { + offset = start & ~TARGET_PAGE_MASK; + b = p->code_bitmap[offset >> 3] >> (offset & 7); + if (b & ((1 << len) - 1)) { + goto do_invalidate; + } + } else { + do_invalidate: + tb_invalidate_phys_page_range(start, start + len, 1); + } +} + +#if !defined(CONFIG_SOFTMMU) +static void tb_invalidate_phys_page(tb_page_addr_t addr, + uintptr_t pc, void *puc) +{ + TranslationBlock *tb; + PageDesc *p; + int n; +#ifdef TARGET_HAS_PRECISE_SMC + TranslationBlock *current_tb = NULL; + CPUArchState *env = cpu_single_env; + int current_tb_modified = 0; + target_ulong current_pc = 0; + target_ulong current_cs_base = 0; + int current_flags = 0; +#endif + + addr &= TARGET_PAGE_MASK; + p = page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + return; + } + tb = p->first_tb; +#ifdef TARGET_HAS_PRECISE_SMC + if (tb && pc != 0) { + current_tb = tb_find_pc(pc); + } +#endif + while (tb != NULL) { + n = (uintptr_t)tb & 3; + tb = (TranslationBlock *)((uintptr_t)tb & ~3); +#ifdef TARGET_HAS_PRECISE_SMC + if (current_tb == tb && + (current_tb->cflags & CF_COUNT_MASK) != 1) { + /* If we are modifying the current TB, we must stop + its execution. We could be more precise by checking + that the modification is after the current PC, but it + would require a specialized function to partially + restore the CPU state */ + + current_tb_modified = 1; + cpu_restore_state_from_tb(current_tb, env, pc); + cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base, + ¤t_flags); + } +#endif /* TARGET_HAS_PRECISE_SMC */ + tb_phys_invalidate(tb, addr); + tb = tb->page_next[n]; + } + p->first_tb = NULL; +#ifdef TARGET_HAS_PRECISE_SMC + if (current_tb_modified) { + /* we generate a block containing just the instruction + modifying the memory. It will ensure that it cannot modify + itself */ + env->current_tb = NULL; + tb_gen_code(env, current_pc, current_cs_base, current_flags, 1); + cpu_resume_from_signal(env, puc); + } +#endif +} +#endif + +/* add the tb in the target page and protect it if necessary */ +static inline void tb_alloc_page(TranslationBlock *tb, + unsigned int n, tb_page_addr_t page_addr) +{ + PageDesc *p; +#ifndef CONFIG_USER_ONLY + bool page_already_protected; +#endif + + tb->page_addr[n] = page_addr; + p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1); + tb->page_next[n] = p->first_tb; +#ifndef CONFIG_USER_ONLY + page_already_protected = p->first_tb != NULL; +#endif + p->first_tb = (TranslationBlock *)((uintptr_t)tb | n); + invalidate_page_bitmap(p); + +#if defined(TARGET_HAS_SMC) || 1 + +#if defined(CONFIG_USER_ONLY) + if (p->flags & PAGE_WRITE) { + target_ulong addr; + PageDesc *p2; + int prot; + + /* force the host page as non writable (writes will have a + page fault + mprotect overhead) */ + page_addr &= qemu_host_page_mask; + prot = 0; + for (addr = page_addr; addr < page_addr + qemu_host_page_size; + addr += TARGET_PAGE_SIZE) { + + p2 = page_find(addr >> TARGET_PAGE_BITS); + if (!p2) { + continue; + } + prot |= p2->flags; + p2->flags &= ~PAGE_WRITE; + } + mprotect(g2h(page_addr), qemu_host_page_size, + (prot & PAGE_BITS) & ~PAGE_WRITE); +#ifdef DEBUG_TB_INVALIDATE + printf("protecting code page: 0x" TARGET_FMT_lx "\n", + page_addr); +#endif + } +#else + /* if some code is already present, then the pages are already + protected. So we handle the case where only the first TB is + allocated in a physical page */ + if (!page_already_protected) { + tlb_protect_code(page_addr); + } +#endif + +#endif /* TARGET_HAS_SMC */ +} + +/* add a new TB and link it to the physical page tables. phys_page2 is + (-1) to indicate that only one page contains the TB. */ +static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, + tb_page_addr_t phys_page2) +{ + unsigned int h; + TranslationBlock **ptb; + + /* Grab the mmap lock to stop another thread invalidating this TB + before we are done. */ + mmap_lock(); + /* add in the physical hash table */ + h = tb_phys_hash_func(phys_pc); + ptb = &tb_phys_hash[h]; + tb->phys_hash_next = *ptb; + *ptb = tb; + + /* add in the page list */ + tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); + if (phys_page2 != -1) { + tb_alloc_page(tb, 1, phys_page2); + } else { + tb->page_addr[1] = -1; + } + + tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); + tb->jmp_next[0] = NULL; + tb->jmp_next[1] = NULL; + + /* init original jump addresses */ + if (tb->tb_next_offset[0] != 0xffff) { + tb_reset_jump(tb, 0); + } + if (tb->tb_next_offset[1] != 0xffff) { + tb_reset_jump(tb, 1); + } + +#ifdef DEBUG_TB_CHECK + tb_page_check(); +#endif + mmap_unlock(); +} + +#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) +/* check whether the given addr is in TCG generated code buffer or not */ +bool is_tcg_gen_code(uintptr_t tc_ptr) +{ + /* This can be called during code generation, code_gen_buffer_max_size + is used instead of code_gen_ptr for upper boundary checking */ + return (tc_ptr >= (uintptr_t)code_gen_buffer && + tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size)); +} +#endif + +/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < + tb[1].tc_ptr. Return NULL if not found */ +static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) +{ + int m_min, m_max, m; + uintptr_t v; + TranslationBlock *tb; + + if (nb_tbs <= 0) { + return NULL; + } + if (tc_ptr < (uintptr_t)code_gen_buffer || + tc_ptr >= (uintptr_t)code_gen_ptr) { + return NULL; + } + /* binary search (cf Knuth) */ + m_min = 0; + m_max = nb_tbs - 1; + while (m_min <= m_max) { + m = (m_min + m_max) >> 1; + tb = &tbs[m]; + v = (uintptr_t)tb->tc_ptr; + if (v == tc_ptr) { + return tb; + } else if (tc_ptr < v) { + m_max = m - 1; + } else { + m_min = m + 1; + } + } + return &tbs[m_max]; +} + +static void tb_reset_jump_recursive(TranslationBlock *tb); + +static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n) +{ + TranslationBlock *tb1, *tb_next, **ptb; + unsigned int n1; + + tb1 = tb->jmp_next[n]; + if (tb1 != NULL) { + /* find head of list */ + for (;;) { + n1 = (uintptr_t)tb1 & 3; + tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); + if (n1 == 2) { + break; + } + tb1 = tb1->jmp_next[n1]; + } + /* we are now sure now that tb jumps to tb1 */ + tb_next = tb1; + + /* remove tb from the jmp_first list */ + ptb = &tb_next->jmp_first; + for (;;) { + tb1 = *ptb; + n1 = (uintptr_t)tb1 & 3; + tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3); + if (n1 == n && tb1 == tb) { + break; + } + ptb = &tb1->jmp_next[n1]; + } + *ptb = tb->jmp_next[n]; + tb->jmp_next[n] = NULL; + + /* suppress the jump to next tb in generated code */ + tb_reset_jump(tb, n); + + /* suppress jumps in the tb on which we could have jumped */ + tb_reset_jump_recursive(tb_next); + } +} + +static void tb_reset_jump_recursive(TranslationBlock *tb) +{ + tb_reset_jump_recursive2(tb, 0); + tb_reset_jump_recursive2(tb, 1); +} + +#if defined(TARGET_HAS_ICE) && !defined(CONFIG_USER_ONLY) +void tb_invalidate_phys_addr(hwaddr addr) +{ + ram_addr_t ram_addr; + MemoryRegionSection *section; + + section = phys_page_find(address_space_memory.dispatch, + addr >> TARGET_PAGE_BITS); + if (!(memory_region_is_ram(section->mr) + || (section->mr->rom_device && section->mr->readable))) { + return; + } + ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK) + + memory_region_section_addr(section, addr); + tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); +} +#endif /* TARGET_HAS_ICE && !defined(CONFIG_USER_ONLY) */ + +void cpu_unlink_tb(CPUArchState *env) +{ + /* FIXME: TB unchaining isn't SMP safe. For now just ignore the + problem and hope the cpu will stop of its own accord. For userspace + emulation this often isn't actually as bad as it sounds. Often + signals are used primarily to interrupt blocking syscalls. */ + TranslationBlock *tb; + static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED; + + spin_lock(&interrupt_lock); + tb = env->current_tb; + /* if the cpu is currently executing code, we must unlink it and + all the potentially executing TB */ + if (tb) { + env->current_tb = NULL; + tb_reset_jump_recursive(tb); + } + spin_unlock(&interrupt_lock); +} + +void tb_check_watchpoint(CPUArchState *env) +{ + TranslationBlock *tb; + + tb = tb_find_pc(env->mem_io_pc); + if (!tb) { + cpu_abort(env, "check_watchpoint: could not find TB for pc=%p", + (void *)env->mem_io_pc); + } + cpu_restore_state_from_tb(tb, env, env->mem_io_pc); + tb_phys_invalidate(tb, -1); +} + +#ifndef CONFIG_USER_ONLY +/* mask must never be zero, except for A20 change call */ +static void tcg_handle_interrupt(CPUArchState *env, int mask) +{ + CPUState *cpu = ENV_GET_CPU(env); + int old_mask; + + old_mask = env->interrupt_request; + env->interrupt_request |= mask; + + /* + * If called from iothread context, wake the target cpu in + * case its halted. + */ + if (!qemu_cpu_is_self(cpu)) { + qemu_cpu_kick(cpu); + return; + } + + if (use_icount) { + env->icount_decr.u16.high = 0xffff; + if (!can_do_io(env) + && (mask & ~old_mask) != 0) { + cpu_abort(env, "Raised interrupt while not in I/O function"); + } + } else { + cpu_unlink_tb(env); + } +} + +CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt; + +/* in deterministic execution mode, instructions doing device I/Os + must be at the end of the TB */ +void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr) +{ + TranslationBlock *tb; + uint32_t n, cflags; + target_ulong pc, cs_base; + uint64_t flags; + + tb = tb_find_pc(retaddr); + if (!tb) { + cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", + (void *)retaddr); + } + n = env->icount_decr.u16.low + tb->icount; + cpu_restore_state_from_tb(tb, env, retaddr); + /* Calculate how many instructions had been executed before the fault + occurred. */ + n = n - env->icount_decr.u16.low; + /* Generate a new TB ending on the I/O insn. */ + n++; + /* On MIPS and SH, delay slot instructions can only be restarted if + they were already the first instruction in the TB. If this is not + the first instruction in a TB then re-execute the preceding + branch. */ +#if defined(TARGET_MIPS) + if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) { + env->active_tc.PC -= 4; + env->icount_decr.u16.low++; + env->hflags &= ~MIPS_HFLAG_BMASK; + } +#elif defined(TARGET_SH4) + if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0 + && n > 1) { + env->pc -= 2; + env->icount_decr.u16.low++; + env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); + } +#endif + /* This should never happen. */ + if (n > CF_COUNT_MASK) { + cpu_abort(env, "TB too big during recompile"); + } + + cflags = n | CF_LAST_IO; + pc = tb->pc; + cs_base = tb->cs_base; + flags = tb->flags; + tb_phys_invalidate(tb, -1); + /* FIXME: In theory this could raise an exception. In practice + we have already translated the block once so it's probably ok. */ + tb_gen_code(env, pc, cs_base, flags, cflags); + /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not + the first in the TB) then we end up generating a whole new TB and + repeating the fault, which is horribly inefficient. + Better would be to execute just this insn uncached, or generate a + second new TB. */ + cpu_resume_from_signal(env, NULL); +} + +void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr) +{ + unsigned int i; + + /* Discard jump cache entries for any tb which might potentially + overlap the flushed page. */ + i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE); + memset(&env->tb_jmp_cache[i], 0, + TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); + + i = tb_jmp_cache_hash_page(addr); + memset(&env->tb_jmp_cache[i], 0, + TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); +} + +void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) +{ + int i, target_code_size, max_target_code_size; + int direct_jmp_count, direct_jmp2_count, cross_page; + TranslationBlock *tb; + + target_code_size = 0; + max_target_code_size = 0; + cross_page = 0; + direct_jmp_count = 0; + direct_jmp2_count = 0; + for (i = 0; i < nb_tbs; i++) { + tb = &tbs[i]; + target_code_size += tb->size; + if (tb->size > max_target_code_size) { + max_target_code_size = tb->size; + } + if (tb->page_addr[1] != -1) { + cross_page++; + } + if (tb->tb_next_offset[0] != 0xffff) { + direct_jmp_count++; + if (tb->tb_next_offset[1] != 0xffff) { + direct_jmp2_count++; + } + } + } + /* XXX: avoid using doubles ? */ + cpu_fprintf(f, "Translation buffer state:\n"); + cpu_fprintf(f, "gen code size %td/%zd\n", + code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size); + cpu_fprintf(f, "TB count %d/%d\n", + nb_tbs, code_gen_max_blocks); + cpu_fprintf(f, "TB avg target size %d max=%d bytes\n", + nb_tbs ? target_code_size / nb_tbs : 0, + max_target_code_size); + cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n", + nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0, + target_code_size ? (double) (code_gen_ptr - code_gen_buffer) + / target_code_size : 0); + cpu_fprintf(f, "cross page TB count %d (%d%%)\n", + cross_page, + nb_tbs ? (cross_page * 100) / nb_tbs : 0); + cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n", + direct_jmp_count, + nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0, + direct_jmp2_count, + nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0); + cpu_fprintf(f, "\nStatistics:\n"); + cpu_fprintf(f, "TB flush count %d\n", tb_flush_count); + cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count); + cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); + tcg_dump_info(f, cpu_fprintf); +} + +#else /* CONFIG_USER_ONLY */ + +void cpu_interrupt(CPUArchState *env, int mask) +{ + env->interrupt_request |= mask; + cpu_unlink_tb(env); +} + +/* + * Walks guest process memory "regions" one by one + * and calls callback function 'fn' for each region. + */ +struct walk_memory_regions_data { + walk_memory_regions_fn fn; + void *priv; + uintptr_t start; + int prot; +}; + +static int walk_memory_regions_end(struct walk_memory_regions_data *data, + abi_ulong end, int new_prot) +{ + if (data->start != -1ul) { + int rc = data->fn(data->priv, data->start, end, data->prot); + if (rc != 0) { + return rc; + } + } + + data->start = (new_prot ? end : -1ul); + data->prot = new_prot; + + return 0; +} + +static int walk_memory_regions_1(struct walk_memory_regions_data *data, + abi_ulong base, int level, void **lp) +{ + abi_ulong pa; + int i, rc; + + if (*lp == NULL) { + return walk_memory_regions_end(data, base, 0); + } + + if (level == 0) { + PageDesc *pd = *lp; + + for (i = 0; i < L2_SIZE; ++i) { + int prot = pd[i].flags; + + pa = base | (i << TARGET_PAGE_BITS); + if (prot != data->prot) { + rc = walk_memory_regions_end(data, pa, prot); + if (rc != 0) { + return rc; + } + } + } + } else { + void **pp = *lp; + + for (i = 0; i < L2_SIZE; ++i) { + pa = base | ((abi_ulong)i << + (TARGET_PAGE_BITS + L2_BITS * level)); + rc = walk_memory_regions_1(data, pa, level - 1, pp + i); + if (rc != 0) { + return rc; + } + } + } + + return 0; +} + +int walk_memory_regions(void *priv, walk_memory_regions_fn fn) +{ + struct walk_memory_regions_data data; + uintptr_t i; + + data.fn = fn; + data.priv = priv; + data.start = -1ul; + data.prot = 0; + + for (i = 0; i < V_L1_SIZE; i++) { + int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT, + V_L1_SHIFT / L2_BITS - 1, l1_map + i); + + if (rc != 0) { + return rc; + } + } + + return walk_memory_regions_end(&data, 0, 0); +} + +static int dump_region(void *priv, abi_ulong start, + abi_ulong end, unsigned long prot) +{ + FILE *f = (FILE *)priv; + + (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx + " "TARGET_ABI_FMT_lx" %c%c%c\n", + start, end, end - start, + ((prot & PAGE_READ) ? 'r' : '-'), + ((prot & PAGE_WRITE) ? 'w' : '-'), + ((prot & PAGE_EXEC) ? 'x' : '-')); + + return 0; +} + +/* dump memory mappings */ +void page_dump(FILE *f) +{ + (void) fprintf(f, "%-8s %-8s %-8s %s\n", + "start", "end", "size", "prot"); + walk_memory_regions(f, dump_region); +} + +int page_get_flags(target_ulong address) +{ + PageDesc *p; + + p = page_find(address >> TARGET_PAGE_BITS); + if (!p) { + return 0; + } + return p->flags; +} + +/* Modify the flags of a page and invalidate the code if necessary. + The flag PAGE_WRITE_ORG is positioned automatically depending + on PAGE_WRITE. The mmap_lock should already be held. */ +void page_set_flags(target_ulong start, target_ulong end, int flags) +{ + target_ulong addr, len; + + /* This function should never be called with addresses outside the + guest address space. If this assert fires, it probably indicates + a missing call to h2g_valid. */ +#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS + assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); +#endif + assert(start < end); + + start = start & TARGET_PAGE_MASK; + end = TARGET_PAGE_ALIGN(end); + + if (flags & PAGE_WRITE) { + flags |= PAGE_WRITE_ORG; + } + + for (addr = start, len = end - start; + len != 0; + len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { + PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1); + + /* If the write protection bit is set, then we invalidate + the code inside. */ + if (!(p->flags & PAGE_WRITE) && + (flags & PAGE_WRITE) && + p->first_tb) { + tb_invalidate_phys_page(addr, 0, NULL); + } + p->flags = flags; + } +} + +int page_check_range(target_ulong start, target_ulong len, int flags) +{ + PageDesc *p; + target_ulong end; + target_ulong addr; + + /* This function should never be called with addresses outside the + guest address space. If this assert fires, it probably indicates + a missing call to h2g_valid. */ +#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS + assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); +#endif + + if (len == 0) { + return 0; + } + if (start + len - 1 < start) { + /* We've wrapped around. */ + return -1; + } + + /* must do before we loose bits in the next step */ + end = TARGET_PAGE_ALIGN(start + len); + start = start & TARGET_PAGE_MASK; + + for (addr = start, len = end - start; + len != 0; + len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) { + p = page_find(addr >> TARGET_PAGE_BITS); + if (!p) { + return -1; + } + if (!(p->flags & PAGE_VALID)) { + return -1; + } + + if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) { + return -1; + } + if (flags & PAGE_WRITE) { + if (!(p->flags & PAGE_WRITE_ORG)) { + return -1; + } + /* unprotect the page if it was put read-only because it + contains translated code */ + if (!(p->flags & PAGE_WRITE)) { + if (!page_unprotect(addr, 0, NULL)) { + return -1; + } + } + return 0; + } + } + return 0; +} + +/* called from signal handler: invalidate the code and unprotect the + page. Return TRUE if the fault was successfully handled. */ +int page_unprotect(target_ulong address, uintptr_t pc, void *puc) +{ + unsigned int prot; + PageDesc *p; + target_ulong host_start, host_end, addr; + + /* Technically this isn't safe inside a signal handler. However we + know this only ever happens in a synchronous SEGV handler, so in + practice it seems to be ok. */ + mmap_lock(); + + p = page_find(address >> TARGET_PAGE_BITS); + if (!p) { + mmap_unlock(); + return 0; + } + + /* if the page was really writable, then we change its + protection back to writable */ + if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) { + host_start = address & qemu_host_page_mask; + host_end = host_start + qemu_host_page_size; + + prot = 0; + for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) { + p = page_find(addr >> TARGET_PAGE_BITS); + p->flags |= PAGE_WRITE; + prot |= p->flags; + + /* and since the content will be modified, we must invalidate + the corresponding translated code. */ + tb_invalidate_phys_page(addr, pc, puc); +#ifdef DEBUG_TB_CHECK + tb_invalidate_check(addr); +#endif + } + mprotect((void *)g2h(host_start), qemu_host_page_size, + prot & PAGE_BITS); + + mmap_unlock(); + return 1; + } + mmap_unlock(); + return 0; +} +#endif /* CONFIG_USER_ONLY */ diff --git a/translate-all.h b/translate-all.h new file mode 100644 index 0000000000..b181fb48ad --- /dev/null +++ b/translate-all.h @@ -0,0 +1,34 @@ +/* + * Translated block handling + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef TRANSLATE_ALL_H +#define TRANSLATE_ALL_H + +/* Size of the L2 (and L3, etc) page tables. */ +#define L2_BITS 10 +#define L2_SIZE (1 << L2_BITS) + +#define P_L2_LEVELS \ + (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1) + +/* translate-all.c */ +void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len); +void cpu_unlink_tb(CPUArchState *env); +void tb_check_watchpoint(CPUArchState *env); + +#endif /* TRANSLATE_ALL_H */ diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c index 9ae4cabffc..62d0fde77f 100644 --- a/ui/vnc-enc-tight.c +++ b/ui/vnc-enc-tight.c @@ -1212,7 +1212,7 @@ static int send_jpeg_rect(VncState *vs, int x, int y, int w, int h, int quality) buf = (uint8_t *)pixman_image_get_data(linebuf); row[0] = buf; for (dy = 0; dy < h; dy++) { - qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, dy); + qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, x, y + dy); jpeg_write_scanlines(&cinfo, row, 1); } qemu_pixman_image_unref(linebuf); @@ -1356,7 +1356,7 @@ static int send_png_rect(VncState *vs, int x, int y, int w, int h, if (color_type == PNG_COLOR_TYPE_PALETTE) { memcpy(buf, vs->tight.tight.buffer + (dy * w), w); } else { - qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, dy); + qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, x, y + dy); } png_write_row(png_ptr, buf); } @@ -2569,7 +2569,7 @@ static int vnc_refresh_server_surface(VncDisplay *vd) uint8_t *server_ptr; if (vd->guest.format != VNC_SERVER_FB_FORMAT) { - qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, y); + qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y); guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf); } else { guest_ptr = guest_row; diff --git a/user-exec.c b/user-exec.c index ef9b1727b3..1185cb03c8 100644 --- a/user-exec.c +++ b/user-exec.c @@ -81,7 +81,6 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address, int is_write, sigset_t *old_set, void *puc) { - TranslationBlock *tb; int ret; #if defined(DEBUG_SIGNAL) @@ -104,12 +103,7 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address, return 1; /* the MMU fault was handled without causing real CPU fault */ } /* now we have a real cpu fault */ - tb = tb_find_pc(pc); - if (tb) { - /* the PC is inside the translated code. It means that we have - a virtual CPU fault */ - cpu_restore_state(tb, cpu_single_env, pc); - } + cpu_restore_state(cpu_single_env, pc); /* we restore the process signal mask as the sigreturn should do it (XXX: use sigsetjmp) */ @@ -886,9 +886,9 @@ static int cleanup_add_fd(QemuOpts *opts, void *opaque) static int drive_init_func(QemuOpts *opts, void *opaque) { - int *use_scsi = opaque; + BlockInterfaceType *block_default_type = opaque; - return drive_init(opts, *use_scsi) == NULL; + return drive_init(opts, *block_default_type) == NULL; } static int drive_enable_snapshot(QemuOpts *opts, void *opaque) @@ -899,16 +899,11 @@ static int drive_enable_snapshot(QemuOpts *opts, void *opaque) return 0; } -static void default_drive(int enable, int snapshot, int use_scsi, - BlockInterfaceType type, int index, - const char *optstr) +static void default_drive(int enable, int snapshot, BlockInterfaceType type, + int index, const char *optstr) { QemuOpts *opts; - if (type == IF_DEFAULT) { - type = use_scsi ? IF_SCSI : IF_IDE; - } - if (!enable || drive_get_by_index(type, index)) { return; } @@ -917,7 +912,7 @@ static void default_drive(int enable, int snapshot, int use_scsi, if (snapshot) { drive_enable_snapshot(opts, NULL); } - if (!drive_init(opts, use_scsi)) { + if (!drive_init(opts, type)) { exit(1); } } @@ -2001,7 +1996,7 @@ static int balloon_parse(const char *arg) return -1; } else { /* create empty opts */ - opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL); + opts = qemu_opts_create_nofail(qemu_find_opts("device")); } qemu_opt_set(opts, "driver", "virtio-balloon"); return 0; @@ -2251,14 +2246,14 @@ static int virtcon_parse(const char *devname) exit(1); } - bus_opts = qemu_opts_create(device, NULL, 0, NULL); + bus_opts = qemu_opts_create_nofail(device); if (arch_type == QEMU_ARCH_S390X) { qemu_opt_set(bus_opts, "driver", "virtio-serial-s390"); } else { qemu_opt_set(bus_opts, "driver", "virtio-serial-pci"); } - dev_opts = qemu_opts_create(device, NULL, 0, NULL); + dev_opts = qemu_opts_create_nofail(device); qemu_opt_set(dev_opts, "driver", "virtconsole"); snprintf(label, sizeof(label), "virtcon%d", index); @@ -3110,8 +3105,7 @@ int main(int argc, char **argv, char **envp) qemu_opt_set_bool(fsdev, "readonly", qemu_opt_get_bool(opts, "readonly", 0)); - device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, - NULL); + device = qemu_opts_create_nofail(qemu_find_opts("device")); qemu_opt_set(device, "driver", "virtio-9p-pci"); qemu_opt_set(device, "fsdev", qemu_opt_get(opts, "mount_tag")); @@ -3131,8 +3125,7 @@ int main(int argc, char **argv, char **envp) } qemu_opt_set(fsdev, "fsdriver", "synth"); - device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, - NULL); + device = qemu_opts_create_nofail(qemu_find_opts("device")); qemu_opt_set(device, "driver", "virtio-9p-pci"); qemu_opt_set(device, "fsdev", "v_synth"); qemu_opt_set(device, "mount_tag", "v_synth"); @@ -3770,15 +3763,15 @@ int main(int argc, char **argv, char **envp) /* open the virtual block devices */ if (snapshot) qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, NULL, 0); - if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, &machine->use_scsi, 1) != 0) + if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, + &machine->block_default_type, 1) != 0) { exit(1); + } - default_drive(default_cdrom, snapshot, machine->use_scsi, - IF_DEFAULT, 2, CDROM_OPTS); - default_drive(default_floppy, snapshot, machine->use_scsi, - IF_FLOPPY, 0, FD_OPTS); - default_drive(default_sdcard, snapshot, machine->use_scsi, - IF_SD, 0, SD_OPTS); + default_drive(default_cdrom, snapshot, machine->block_default_type, 2, + CDROM_OPTS); + default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS); + default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS); register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); |