125 files changed, 4341 insertions, 3333 deletions
diff --git a/Makefile.objs b/Makefile.objs
index 3c7abca433..20fb2c54f0 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -92,6 +92,8 @@ common-obj-$(CONFIG_SPICE) += spice-qemu-char.o
 
 common-obj-y += audio/
 common-obj-y += hw/
+extra-obj-y += hw/
+
 common-obj-y += ui/
 common-obj-y += bt-host.o bt-vhci.o
 
diff --git a/arch_init.c b/arch_init.c
index e307b23310..1645f3079a 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -568,7 +568,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
 
     migration_bitmap = bitmap_new(ram_pages);
-    bitmap_set(migration_bitmap, 1, ram_pages);
+    bitmap_set(migration_bitmap, 0, ram_pages);
     migration_dirty_pages = ram_pages;
 
     bytes_transferred = 0;
@@ -840,7 +840,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
             memset(host, ch, TARGET_PAGE_SIZE);
 #ifndef _WIN32
             if (ch == 0 &&
-                (!kvm_enabled() || kvm_has_sync_mmu())) {
+                (!kvm_enabled() || kvm_has_sync_mmu()) &&
+                getpagesize() <= TARGET_PAGE_SIZE) {
                 qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
             }
 #endif
diff --git a/async.c b/async.c
index 3f0e8f367c..41ae0c1195 100644
--- a/async.c
+++ b/async.c
@@ -215,8 +215,3 @@ void aio_context_unref(AioContext *ctx)
 {
     g_source_unref(&ctx->source);
 }
-
-void aio_flush(AioContext *ctx)
-{
-    while (aio_poll(ctx, true));
-}
diff --git a/block.c b/block.c
index c05875fe39..0668c4be17 100644
--- a/block.c
+++ b/block.c
@@ -518,22 +518,16 @@ BlockDriver *bdrv_find_protocol(const char *filename)
     return NULL;
 }
 
-static int find_image_format(const char *filename, BlockDriver **pdrv)
+static int find_image_format(BlockDriverState *bs, const char *filename,
+                             BlockDriver **pdrv)
 {
-    int ret, score, score_max;
+    int score, score_max;
     BlockDriver *drv1, *drv;
     uint8_t buf[2048];
-    BlockDriverState *bs;
-
-    ret = bdrv_file_open(&bs, filename, 0);
-    if (ret < 0) {
-        *pdrv = NULL;
-        return ret;
-    }
+    int ret = 0;
 
     /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
     if (bs->sg || !bdrv_is_inserted(bs)) {
-        bdrv_delete(bs);
         drv = bdrv_find_format("raw");
         if (!drv) {
             ret = -ENOENT;
@@ -543,7 +537,6 @@ static int find_image_format(const char *filename, BlockDriver **pdrv)
     }
 
     ret = bdrv_pread(bs, 0, buf, sizeof(buf));
-    bdrv_delete(bs);
     if (ret < 0) {
         *pdrv = NULL;
         return ret;
@@ -634,10 +627,31 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
     bs->copy_on_read--;
 }
 
+static int bdrv_open_flags(BlockDriverState *bs, int flags)
+{
+    int open_flags = flags | BDRV_O_CACHE_WB;
+
+    /*
+     * Clear flags that are internal to the block layer before opening the
+     * image.
+     */
+    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+    /*
+     * Snapshots should be writable.
+     */
+    if (bs->is_temporary) {
+        open_flags |= BDRV_O_RDWR;
+    }
+
+    return open_flags;
+}
+
 /*
  * Common part for opening disk images and files
  */
-static int bdrv_open_common(BlockDriverState *bs, const char *filename,
+static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
+    const char *filename,
     int flags, BlockDriver *drv)
 {
     int ret, open_flags;
@@ -665,31 +679,22 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
     bs->opaque = g_malloc0(drv->instance_size);
 
     bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
-    open_flags = flags | BDRV_O_CACHE_WB;
-
-    /*
-     * Clear flags that are internal to the block layer before opening the
-     * image.
-     */
-    open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
-    /*
-     * Snapshots should be writable.
-     */
-    if (bs->is_temporary) {
-        open_flags |= BDRV_O_RDWR;
-    }
+    open_flags = bdrv_open_flags(bs, flags);
 
     bs->read_only = !(open_flags & BDRV_O_RDWR);
 
     /* Open the image, either directly or using a protocol */
     if (drv->bdrv_file_open) {
-        ret = drv->bdrv_file_open(bs, filename, open_flags);
-    } else {
-        ret = bdrv_file_open(&bs->file, filename, open_flags);
-        if (ret >= 0) {
-            ret = drv->bdrv_open(bs, open_flags);
+        if (file != NULL) {
+            bdrv_swap(file, bs);
+            ret = 0;
+        } else {
+            ret = drv->bdrv_file_open(bs, filename, open_flags);
         }
+    } else {
+        assert(file != NULL);
+        bs->file = file;
+        ret = drv->bdrv_open(bs, open_flags);
     }
 
     if (ret < 0) {
@@ -709,10 +714,7 @@ static int bdrv_open_common(BlockDriverState *bs, const char *filename,
     return 0;
 
 free_and_fail:
-    if (bs->file) {
-        bdrv_delete(bs->file);
-        bs->file = NULL;
-    }
+    bs->file = NULL;
     g_free(bs->opaque);
     bs->opaque = NULL;
     bs->drv = NULL;
@@ -734,7 +736,7 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
     }
 
     bs = bdrv_new("");
-    ret = bdrv_open_common(bs, filename, flags, drv);
+    ret = bdrv_open_common(bs, NULL, filename, flags, drv);
     if (ret < 0) {
         bdrv_delete(bs);
         return ret;
@@ -789,6 +791,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
     int ret;
     /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
     char tmp_filename[PATH_MAX + 1];
+    BlockDriverState *file = NULL;
 
     if (flags & BDRV_O_SNAPSHOT) {
         BlockDriverState *bs1;
@@ -848,25 +851,36 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
         bs->is_temporary = 1;
     }
 
+    /* Open image file without format layer */
+    if (flags & BDRV_O_RDWR) {
+        flags |= BDRV_O_ALLOW_RDWR;
+    }
+
+    ret = bdrv_file_open(&file, filename, bdrv_open_flags(bs, flags));
+    if (ret < 0) {
+        return ret;
+    }
+
     /* Find the right image format driver */
     if (!drv) {
-        ret = find_image_format(filename, &drv);
+        ret = find_image_format(file, filename, &drv);
     }
 
     if (!drv) {
         goto unlink_and_fail;
     }
 
-    if (flags & BDRV_O_RDWR) {
-        flags |= BDRV_O_ALLOW_RDWR;
-    }
-
     /* Open the image */
-    ret = bdrv_open_common(bs, filename, flags, drv);
+    ret = bdrv_open_common(bs, file, filename, flags, drv);
     if (ret < 0) {
         goto unlink_and_fail;
     }
 
+    if (bs->file != file) {
+        bdrv_delete(file);
+        file = NULL;
+    }
+
     /* If there is a backing file, use it */
     if ((flags & BDRV_O_NO_BACKING) == 0) {
         ret = bdrv_open_backing_file(bs);
@@ -888,6 +902,9 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
     return 0;
 
 unlink_and_fail:
+    if (file != NULL) {
+        bdrv_delete(file);
+    }
     if (bs->is_temporary) {
         unlink(filename);
     }
@@ -3028,7 +3045,46 @@ void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
     }
 
     drv->bdrv_debug_event(bs, event);
+}
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+                          const char *tag)
+{
+    while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
+        bs = bs->file;
+    }
+
+    if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
+        return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
+    }
 
+    return -ENOTSUP;
+}
+
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
+{
+    while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
+        bs = bs->file;
+    }
+
+    if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
+        return bs->drv->bdrv_debug_resume(bs, tag);
+    }
+
+    return -ENOTSUP;
+}
+
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
+{
+    while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
+        bs = bs->file;
+    }
+
+    if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
+        return bs->drv->bdrv_debug_is_suspended(bs, tag);
+    }
+
+    return false;
 }
 
 /**************************************************************/
@@ -3778,12 +3834,20 @@ typedef struct BlockDriverAIOCBCoroutine {
     BlockDriverAIOCB common;
     BlockRequest req;
     bool is_write;
+    bool *done;
     QEMUBH* bh;
 } BlockDriverAIOCBCoroutine;
 
 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
 {
-    qemu_aio_flush();
+    BlockDriverAIOCBCoroutine *acb =
+        container_of(blockacb, BlockDriverAIOCBCoroutine, common);
+    bool done = false;
+
+    acb->done = &done;
+    while (!done) {
+        qemu_aio_wait();
+    }
 }
 
 static const AIOCBInfo bdrv_em_co_aiocb_info = {
@@ -3796,6 +3860,11 @@ static void bdrv_co_em_bh(void *opaque)
     BlockDriverAIOCBCoroutine *acb = opaque;
 
     acb->common.cb(acb->common.opaque, acb->req.error);
+
+    if (acb->done) {
+        *acb->done = true;
+    }
+
     qemu_bh_delete(acb->bh);
     qemu_aio_release(acb);
 }
@@ -3834,6 +3903,7 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
     acb->req.nb_sectors = nb_sectors;
     acb->req.qiov = qiov;
     acb->is_write = is_write;
+    acb->done = NULL;
 
     co = qemu_coroutine_create(bdrv_co_do_rw);
     qemu_coroutine_enter(co, acb);
@@ -3860,6 +3930,8 @@ BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
     BlockDriverAIOCBCoroutine *acb;
 
     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+    acb->done = NULL;
+
     co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
     qemu_coroutine_enter(co, acb);
 
@@ -3888,6 +3960,7 @@ BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
     acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
     acb->req.sector = sector_num;
     acb->req.nb_sectors = nb_sectors;
+    acb->done = NULL;
     co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
     qemu_coroutine_enter(co, acb);
 
@@ -4408,9 +4481,9 @@ bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
     bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
 }
 
-int bdrv_img_create(const char *filename, const char *fmt,
-                    const char *base_filename, const char *base_fmt,
-                    char *options, uint64_t img_size, int flags)
+void bdrv_img_create(const char *filename, const char *fmt,
+                     const char *base_filename, const char *base_fmt,
+                     char *options, uint64_t img_size, int flags, Error **errp)
 {
     QEMUOptionParameter *param = NULL, *create_options = NULL;
     QEMUOptionParameter *backing_fmt, *backing_file, *size;
@@ -4422,16 +4495,14 @@ int bdrv_img_create(const char *filename, const char *fmt,
     /* Find driver and parse its options */
     drv = bdrv_find_format(fmt);
     if (!drv) {
-        error_report("Unknown file format '%s'", fmt);
-        ret = -EINVAL;
-        goto out;
+        error_setg(errp, "Unknown file format '%s'", fmt);
+        return;
     }
 
     proto_drv = bdrv_find_protocol(filename);
     if (!proto_drv) {
-        error_report("Unknown protocol '%s'", filename);
-        ret = -EINVAL;
-        goto out;
+        error_setg(errp, "Unknown protocol '%s'", filename);
+        return;
     }
 
     create_options = append_option_parameters(create_options,
@@ -4448,8 +4519,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
     if (options) {
         param = parse_option_parameters(options, create_options, param);
         if (param == NULL) {
-            error_report("Invalid options for file format '%s'.", fmt);
-            ret = -EINVAL;
+            error_setg(errp, "Invalid options for file format '%s'.", fmt);
             goto out;
         }
     }
@@ -4457,18 +4527,16 @@ int bdrv_img_create(const char *filename, const char *fmt,
     if (base_filename) {
         if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
                                  base_filename)) {
-            error_report("Backing file not supported for file format '%s'",
-                         fmt);
-            ret = -EINVAL;
+            error_setg(errp, "Backing file not supported for file format '%s'",
+                       fmt);
             goto out;
         }
     }
 
     if (base_fmt) {
         if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
-            error_report("Backing file format not supported for file "
-                         "format '%s'", fmt);
-            ret = -EINVAL;
+            error_setg(errp, "Backing file format not supported for file "
+                             "format '%s'", fmt);
             goto out;
         }
     }
@@ -4476,9 +4544,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
     backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
     if (backing_file && backing_file->value.s) {
         if (!strcmp(filename, backing_file->value.s)) {
-            error_report("Error: Trying to create an image with the "
-                         "same filename as the backing file");
-            ret = -EINVAL;
+            error_setg(errp, "Error: Trying to create an image with the "
+                             "same filename as the backing file");
             goto out;
         }
     }
@@ -4487,9 +4554,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
     if (backing_fmt && backing_fmt->value.s) {
         backing_drv = bdrv_find_format(backing_fmt->value.s);
         if (!backing_drv) {
-            error_report("Unknown backing file format '%s'",
-                         backing_fmt->value.s);
-            ret = -EINVAL;
+            error_setg(errp, "Unknown backing file format '%s'",
+                       backing_fmt->value.s);
             goto out;
         }
     }
@@ -4511,7 +4577,8 @@ int bdrv_img_create(const char *filename, const char *fmt,
 
             ret = bdrv_open(bs, backing_file->value.s, back_flags, backing_drv);
             if (ret < 0) {
-                error_report("Could not open '%s'", backing_file->value.s);
+                error_setg_errno(errp, -ret, "Could not open '%s'",
+                                 backing_file->value.s);
                 goto out;
             }
             bdrv_get_geometry(bs, &size);
@@ -4520,8 +4587,7 @@ int bdrv_img_create(const char *filename, const char *fmt,
             snprintf(buf, sizeof(buf), "%" PRId64, size);
             set_option_parameter(param, BLOCK_OPT_SIZE, buf);
         } else {
-            error_report("Image creation needs a size parameter");
-            ret = -EINVAL;
+            error_setg(errp, "Image creation needs a size parameter");
             goto out;
         }
     }
@@ -4531,17 +4597,16 @@ int bdrv_img_create(const char *filename, const char *fmt,
     puts("");
 
     ret = bdrv_create(drv, filename, param);
-
     if (ret < 0) {
         if (ret == -ENOTSUP) {
-            error_report("Formatting or formatting option not supported for "
-                         "file format '%s'", fmt);
+            error_setg(errp,"Formatting or formatting option not supported for "
+                            "file format '%s'", fmt);
         } else if (ret == -EFBIG) {
-            error_report("The image size is too large for file format '%s'",
-                         fmt);
+            error_setg(errp, "The image size is too large for file format '%s'",
+                       fmt);
         } else {
-            error_report("%s: error while creating %s: %s", filename, fmt,
-                         strerror(-ret));
+            error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
+                       strerror(-ret));
         }
     }
 
@@ -4552,6 +4617,4 @@ out:
     if (bs) {
         bdrv_delete(bs);
     }
-
-    return ret;
 }
diff --git a/block.h b/block.h
index 722c620590..893448a5fc 100644
--- a/block.h
+++ b/block.h
@@ -343,9 +343,9 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
                       int64_t pos, int size);
 
-int bdrv_img_create(const char *filename, const char *fmt,
-                    const char *base_filename, const char *base_fmt,
-                    char *options, uint64_t img_size, int flags);
+void bdrv_img_create(const char *filename, const char *fmt,
+                     const char *base_filename, const char *base_fmt,
+                     char *options, uint64_t img_size, int flags, Error **errp);
 
 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
@@ -431,4 +431,9 @@ typedef enum {
 #define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
 
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+                           const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
 #endif
diff --git a/block/blkdebug.c b/block/blkdebug.c
index d61ece86a9..294e983306 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -29,8 +29,10 @@
 typedef struct BDRVBlkdebugState {
     int state;
     int new_state;
+
     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
+    QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
 } BDRVBlkdebugState;
 
 typedef struct BlkdebugAIOCB {
@@ -39,6 +41,12 @@ typedef struct BlkdebugAIOCB {
     int ret;
 } BlkdebugAIOCB;
 
+typedef struct BlkdebugSuspendedReq {
+    Coroutine *co;
+    char *tag;
+    QLIST_ENTRY(BlkdebugSuspendedReq) next;
+} BlkdebugSuspendedReq;
+
 static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb);
 
 static const AIOCBInfo blkdebug_aiocb_info = {
@@ -49,6 +57,7 @@ static const AIOCBInfo blkdebug_aiocb_info = {
 enum {
     ACTION_INJECT_ERROR,
     ACTION_SET_STATE,
+    ACTION_SUSPEND,
 };
 
 typedef struct BlkdebugRule {
@@ -65,6 +74,9 @@ typedef struct BlkdebugRule {
         struct {
             int new_state;
         } set_state;
+        struct {
+            char *tag;
+        } suspend;
     } options;
     QLIST_ENTRY(BlkdebugRule) next;
     QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
@@ -226,6 +238,11 @@ static int add_rule(QemuOpts *opts, void *opaque)
         rule->options.set_state.new_state =
             qemu_opt_get_number(opts, "new_state", 0);
         break;
+
+    case ACTION_SUSPEND:
+        rule->options.suspend.tag =
+            g_strdup(qemu_opt_get(opts, "tag"));
+        break;
     };
 
     /* Add the rule */
@@ -234,12 +251,32 @@ static int add_rule(QemuOpts *opts, void *opaque)
     return 0;
 }
 
+static void remove_rule(BlkdebugRule *rule)
+{
+    switch (rule->action) {
+    case ACTION_INJECT_ERROR:
+    case ACTION_SET_STATE:
+        break;
+    case ACTION_SUSPEND:
+        g_free(rule->options.suspend.tag);
+        break;
+    }
+
+    QLIST_REMOVE(rule, next);
+    g_free(rule);
+}
+
 static int read_config(BDRVBlkdebugState *s, const char *filename)
 {
     FILE *f;
     int ret;
     struct add_rule_data d;
 
+    /* Allow usage without config file */
+    if (!*filename) {
+        return 0;
+    }
+
     f = fopen(filename, "r");
     if (f == NULL) {
         return -errno;
@@ -389,6 +426,7 @@ static BlockDriverAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
     return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
 }
 
+
 static void blkdebug_close(BlockDriverState *bs)
 {
     BDRVBlkdebugState *s = bs->opaque;
@@ -397,12 +435,32 @@ static void blkdebug_close(BlockDriverState *bs)
 
     for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
         QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
-            QLIST_REMOVE(rule, next);
-            g_free(rule);
+            remove_rule(rule);
         }
     }
 }
 
+static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugSuspendedReq r;
+
+    r = (BlkdebugSuspendedReq) {
+        .co         = qemu_coroutine_self(),
+        .tag        = g_strdup(rule->options.suspend.tag),
+    };
+
+    remove_rule(rule);
+    QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
+
+    printf("blkdebug: Suspended request '%s'\n", r.tag);
+    qemu_coroutine_yield();
+    printf("blkdebug: Resuming request '%s'\n", r.tag);
+
+    QLIST_REMOVE(&r, next);
+    g_free(r.tag);
+}
+
 static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
     bool injected)
 {
@@ -426,6 +484,10 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
     case ACTION_SET_STATE:
         s->new_state = rule->options.set_state.new_state;
         break;
+
+    case ACTION_SUSPEND:
+        suspend_request(bs, rule);
+        break;
     }
     return injected;
 }
@@ -433,19 +495,72 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
 static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    struct BlkdebugRule *rule;
+    struct BlkdebugRule *rule, *next;
     bool injected;
 
     assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
 
     injected = false;
     s->new_state = s->state;
-    QLIST_FOREACH(rule, &s->rules[event], next) {
+    QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
         injected = process_rule(bs, rule, injected);
     }
     s->state = s->new_state;
 }
 
+static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
+                                     const char *tag)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    struct BlkdebugRule *rule;
+    BlkDebugEvent blkdebug_event;
+
+    if (get_event_by_name(event, &blkdebug_event) < 0) {
+        return -ENOENT;
+    }
+
+
+    rule = g_malloc(sizeof(*rule));
+    *rule = (struct BlkdebugRule) {
+        .event  = blkdebug_event,
+        .action = ACTION_SUSPEND,
+        .state  = 0,
+        .options.suspend.tag = g_strdup(tag),
+    };
+
+    QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
+
+    return 0;
+}
+
+static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugSuspendedReq *r;
+
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
+        if (!strcmp(r->tag, tag)) {
+            qemu_coroutine_enter(r->co, NULL);
+            return 0;
+        }
+    }
+    return -ENOENT;
+}
+
+
+static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+    BlkdebugSuspendedReq *r;
+
+    QLIST_FOREACH(r, &s->suspended_reqs, next) {
+        if (!strcmp(r->tag, tag)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static int64_t blkdebug_getlength(BlockDriverState *bs)
 {
     return bdrv_getlength(bs->file);
@@ -464,7 +579,10 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_aio_readv     = blkdebug_aio_readv,
     .bdrv_aio_writev    = blkdebug_aio_writev,
 
-    .bdrv_debug_event   = blkdebug_debug_event,
+    .bdrv_debug_event           = blkdebug_debug_event,
+    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
+    .bdrv_debug_resume          = blkdebug_debug_resume,
+    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
 };
 
 static void bdrv_blkdebug_init(void)
diff --git a/block/commit.c b/block/commit.c
index fae79582d4..e2bb1e241b 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -103,7 +103,7 @@ static void coroutine_fn commit_run(void *opaque)
 
 wait:
         /* Note that even when no rate limit is applied we need to yield
-         * with no pending I/O here so that qemu_aio_flush() returns.
+         * with no pending I/O here so that bdrv_drain_all() returns.
          */
         block_job_sleep_ns(&s->common, rt_clock, delay_ns);
         if (block_job_is_cancelled(&s->common)) {
diff --git a/block/mirror.c b/block/mirror.c
index d6618a4b34..b1f5d4fa22 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -205,7 +205,7 @@ static void coroutine_fn mirror_run(void *opaque)
             }
 
             /* Note that even when no rate limit is applied we need to yield
-             * with no pending I/O here so that qemu_aio_flush() returns.
+             * with no pending I/O here so that bdrv_drain_all() returns.
              */
             block_job_sleep_ns(&s->common, rt_clock, delay_ns);
             if (block_job_is_cancelled(&s->common)) {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e179211c57..468ef1be56 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -615,57 +615,67 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     return cluster_offset;
 }
 
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
 {
     BDRVQcowState *s = bs->opaque;
-    int i, j = 0, l2_index, ret;
-    uint64_t *old_cluster, start_sect, *l2_table;
-    uint64_t cluster_offset = m->alloc_offset;
-    bool cow = false;
-
-    trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+    int ret;
 
-    if (m->nb_clusters == 0)
+    if (r->nb_sectors == 0) {
         return 0;
+    }
 
-    old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+    qemu_co_mutex_unlock(&s->lock);
+    ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
+                       r->offset / BDRV_SECTOR_SIZE,
+                       r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+    qemu_co_mutex_lock(&s->lock);
 
-    /* copy content of unmodified sectors */
-    start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
-    if (m->n_start) {
-        cow = true;
-        qemu_co_mutex_unlock(&s->lock);
-        ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
-        qemu_co_mutex_lock(&s->lock);
-        if (ret < 0)
-            goto err;
-    }
-
-    if (m->nb_available & (s->cluster_sectors - 1)) {
-        cow = true;
-        qemu_co_mutex_unlock(&s->lock);
-        ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available,
-                           align_offset(m->nb_available, s->cluster_sectors));
-        qemu_co_mutex_lock(&s->lock);
-        if (ret < 0)
-            goto err;
+    if (ret < 0) {
+        return ret;
     }
 
     /*
-     * Update L2 table.
-     *
      * Before we update the L2 table to actually point to the new cluster, we
      * need to be sure that the refcounts have been increased and COW was
      * handled.
      */
-    if (cow) {
-        qcow2_cache_depends_on_flush(s->l2_table_cache);
+    qcow2_cache_depends_on_flush(s->l2_table_cache);
+
+    return 0;
+}
+
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+{
+    BDRVQcowState *s = bs->opaque;
+    int i, j = 0, l2_index, ret;
+    uint64_t *old_cluster, *l2_table;
+    uint64_t cluster_offset = m->alloc_offset;
+
+    trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+    assert(m->nb_clusters > 0);
+
+    old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+
+    /* copy content of unmodified sectors */
+    ret = perform_cow(bs, m, &m->cow_start);
+    if (ret < 0) {
+        goto err;
+    }
+
+    ret = perform_cow(bs, m, &m->cow_end);
+    if (ret < 0) {
+        goto err;
     }
 
+    /* Update L2 table. */
+    if (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS) {
+        qcow2_mark_dirty(bs);
+    }
     if (qcow2_need_accurate_refcounts(s)) {
         qcow2_cache_set_dependency(bs, s->l2_table_cache,
                                    s->refcount_block_cache);
     }
+
     ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
     if (ret < 0) {
         goto err;
@@ -743,38 +753,16 @@ out:
 }
 
 /*
- * Allocates new clusters for the given guest_offset.
- *
- * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
- * contain the number of clusters that have been allocated and are contiguous
- * in the image file.
- *
- * If *host_offset is non-zero, it specifies the offset in the image file at
- * which the new clusters must start. *nb_clusters can be 0 on return in this
- * case if the cluster at host_offset is already in use. If *host_offset is
- * zero, the clusters can be allocated anywhere in the image file.
- *
- * *host_offset is updated to contain the offset into the image file at which
- * the first allocated cluster starts.
- *
- * Return 0 on success and -errno in error cases. -EAGAIN means that the
- * function has been waiting for another request and the allocation must be
- * restarted, but the whole request should not be failed.
+ * Check if there already is an AIO write request in flight which allocates
+ * the same cluster. In this case we need to wait until the previous
+ * request has completed and updated the L2 table accordingly.
  */
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *host_offset, unsigned int *nb_clusters)
+static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
+    unsigned int *nb_clusters)
 {
     BDRVQcowState *s = bs->opaque;
     QCowL2Meta *old_alloc;
 
-    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
-                                         *host_offset, *nb_clusters);
-
-    /*
-     * Check if there already is an AIO write request in flight which allocates
-     * the same cluster. In this case we need to wait until the previous
-     * request has completed and updated the L2 table accordingly.
-     */
     QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
 
         uint64_t start = guest_offset >> s->cluster_bits;
@@ -807,6 +795,42 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
         abort();
     }
 
+    return 0;
+}
+
+/*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+    uint64_t *host_offset, unsigned int *nb_clusters)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+
+    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+                                         *host_offset, *nb_clusters);
+
+    ret = handle_dependencies(bs, guest_offset, nb_clusters);
+    if (ret < 0) {
+        return ret;
+    }
+
     /* Allocate new clusters */
     trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
     if (*host_offset == 0) {
@@ -818,7 +842,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
         *host_offset = cluster_offset;
         return 0;
     } else {
-        int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+        ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
         if (ret < 0) {
             return ret;
         }
@@ -847,7 +871,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
  * Return 0 on success and -errno in error cases
  */
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, QCowL2Meta *m)
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
 {
     BDRVQcowState *s = bs->opaque;
     int l2_index, ret, sectors;
@@ -919,12 +943,6 @@ again:
     }
 
     /* If there is something left to allocate, do that now */
-    *m = (QCowL2Meta) {
-        .cluster_offset     = cluster_offset,
-        .nb_clusters        = 0,
-    };
-    qemu_co_queue_init(&m->dependent_requests);
-
     if (nb_clusters > 0) {
         uint64_t alloc_offset;
         uint64_t alloc_cluster_offset;
@@ -957,22 +975,40 @@ again:
              *
              * avail_sectors: Number of sectors from the start of the first
              * newly allocated to the end of the last newly allocated cluster.
+             *
+             * nb_sectors: The number of sectors from the start of the first
+             * newly allocated cluster to the end of the aread that the write
+             * request actually writes to (excluding COW at the end)
              */
             int requested_sectors = n_end - keep_clusters * s->cluster_sectors;
             int avail_sectors = nb_clusters
                                 << (s->cluster_bits - BDRV_SECTOR_BITS);
+            int alloc_n_start = keep_clusters == 0 ? n_start : 0;
+            int nb_sectors = MIN(requested_sectors, avail_sectors);
+
+            if (keep_clusters == 0) {
+                cluster_offset = alloc_cluster_offset;
+            }
+
+            *m = g_malloc0(sizeof(**m));
 
-            *m = (QCowL2Meta) {
-                .cluster_offset = keep_clusters == 0 ?
-                                  alloc_cluster_offset : cluster_offset,
+            **m = (QCowL2Meta) {
                 .alloc_offset   = alloc_cluster_offset,
-                .offset         = alloc_offset,
-                .n_start        = keep_clusters == 0 ? n_start : 0,
+                .offset         = alloc_offset & ~(s->cluster_size - 1),
                 .nb_clusters    = nb_clusters,
-                .nb_available   = MIN(requested_sectors, avail_sectors),
+                .nb_available   = nb_sectors,
+
+                .cow_start = {
+                    .offset     = 0,
+                    .nb_sectors = alloc_n_start,
+                },
+                .cow_end = {
+                    .offset     = nb_sectors * BDRV_SECTOR_SIZE,
+                    .nb_sectors = avail_sectors - nb_sectors,
+                },
             };
-            qemu_co_queue_init(&m->dependent_requests);
-            QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
+            qemu_co_queue_init(&(*m)->dependent_requests);
+            QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
         }
     }
 
@@ -984,12 +1020,13 @@ again:
 
     assert(sectors > n_start);
     *num = sectors - n_start;
+    *host_offset = cluster_offset;
 
     return 0;
 
 fail:
-    if (m->nb_clusters > 0) {
-        QLIST_REMOVE(m, next_in_flight);
+    if (*m && (*m)->nb_clusters > 0) {
+        QLIST_REMOVE(*m, next_in_flight);
     }
     return ret;
 }
diff --git a/block/qcow2.c b/block/qcow2.c
index c1ff31f482..8520bda21a 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -222,7 +222,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
  * updated successfully.  Therefore it is not required to check the return
  * value of this function.
  */
-static int qcow2_mark_dirty(BlockDriverState *bs)
+int qcow2_mark_dirty(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t val;
@@ -745,21 +745,6 @@ fail:
     return ret;
 }
 
-static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
-{
-    /* Take the request off the list of running requests */
-    if (m->nb_clusters != 0) {
-        QLIST_REMOVE(m, next_in_flight);
-    }
-
-    /* Restart all dependent requests */
-    if (!qemu_co_queue_empty(&m->dependent_requests)) {
-        qemu_co_mutex_unlock(&s->lock);
-        qemu_co_queue_restart_all(&m->dependent_requests);
-        qemu_co_mutex_lock(&s->lock);
-    }
-}
-
 static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                            int64_t sector_num,
                            int remaining_sectors,
@@ -774,15 +759,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
     QEMUIOVector hd_qiov;
     uint64_t bytes_done = 0;
     uint8_t *cluster_data = NULL;
-    QCowL2Meta l2meta = {
-        .nb_clusters = 0,
-    };
+    QCowL2Meta *l2meta;
 
     trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
                                  remaining_sectors);
 
-    qemu_co_queue_init(&l2meta.dependent_requests);
-
     qemu_iovec_init(&hd_qiov, qiov->niov);
 
     s->cluster_cache_offset = -1; /* disable compressed cache */
@@ -791,6 +772,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
     while (remaining_sectors != 0) {
 
+        l2meta = NULL;
+
         trace_qcow2_writev_start_part(qemu_coroutine_self());
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
         n_end = index_in_cluster + remaining_sectors;
@@ -800,17 +783,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         }
 
         ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            index_in_cluster, n_end, &cur_nr_sectors, &l2meta);
+            index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
         if (ret < 0) {
             goto fail;
         }
 
-        if (l2meta.nb_clusters > 0 &&
-            (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) {
-            qcow2_mark_dirty(bs);
-        }
-
-        cluster_offset = l2meta.cluster_offset;
         assert((cluster_offset & 511) == 0);
 
         qemu_iovec_reset(&hd_qiov);
@@ -835,8 +812,8 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                 cur_nr_sectors * 512);
         }
 
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         qemu_co_mutex_unlock(&s->lock);
+        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         trace_qcow2_writev_data(qemu_coroutine_self(),
                                 (cluster_offset >> 9) + index_in_cluster);
         ret = bdrv_co_writev(bs->file,
@@ -847,12 +824,24 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             goto fail;
         }
 
-        ret = qcow2_alloc_cluster_link_l2(bs, &l2meta);
-        if (ret < 0) {
-            goto fail;
-        }
+        if (l2meta != NULL) {
+            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+            if (ret < 0) {
+                goto fail;
+            }
+
+            /* Take the request off the list of running requests */
+            if (l2meta->nb_clusters != 0) {
+                QLIST_REMOVE(l2meta, next_in_flight);
+            }
+
+            qemu_co_mutex_unlock(&s->lock);
+            qemu_co_queue_restart_all(&l2meta->dependent_requests);
+            qemu_co_mutex_lock(&s->lock);
 
-        run_dependent_requests(s, &l2meta);
+            g_free(l2meta);
+            l2meta = NULL;
+        }
 
         remaining_sectors -= cur_nr_sectors;
         sector_num += cur_nr_sectors;
@@ -862,10 +851,16 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
     ret = 0;
 
 fail:
-    run_dependent_requests(s, &l2meta);
-
     qemu_co_mutex_unlock(&s->lock);
 
+    if (l2meta != NULL) {
+        if (l2meta->nb_clusters != 0) {
+            QLIST_REMOVE(l2meta, next_in_flight);
+        }
+        qemu_co_queue_restart_all(&l2meta->dependent_requests);
+        g_free(l2meta);
+    }
+
     qemu_iovec_destroy(&hd_qiov);
     qemu_vfree(cluster_data);
     trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
@@ -1128,31 +1123,33 @@ static int preallocate(BlockDriverState *bs)
 {
     uint64_t nb_sectors;
     uint64_t offset;
+    uint64_t host_offset = 0;
     int num;
     int ret;
-    QCowL2Meta meta;
+    QCowL2Meta *meta;
 
     nb_sectors = bdrv_getlength(bs) >> 9;
     offset = 0;
-    qemu_co_queue_init(&meta.dependent_requests);
-    meta.cluster_offset = 0;
 
     while (nb_sectors) {
         num = MIN(nb_sectors, INT_MAX >> 9);
-        ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta);
+        ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+                                         &host_offset, &meta);
         if (ret < 0) {
             return ret;
         }
 
-        ret = qcow2_alloc_cluster_link_l2(bs, &meta);
+        ret = qcow2_alloc_cluster_link_l2(bs, meta);
         if (ret < 0) {
-            qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters);
+            qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters);
             return ret;
         }
 
         /* There are no dependent requests, but we need to remove our request
          * from the list of in-flight requests */
-        run_dependent_requests(bs->opaque, &meta);
+        if (meta != NULL) {
+            QLIST_REMOVE(meta, next_in_flight);
+        }
 
         /* TODO Preallocate data if requested */
 
@@ -1165,10 +1162,10 @@ static int preallocate(BlockDriverState *bs)
      * all of the allocated clusters (otherwise we get failing reads after
      * EOF). Extend the image to the last allocated sector.
      */
-    if (meta.cluster_offset != 0) {
+    if (host_offset != 0) {
         uint8_t buf[512];
         memset(buf, 0, 512);
-        ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1);
+        ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
         if (ret < 0) {
             return ret;
         }
diff --git a/block/qcow2.h b/block/qcow2.h
index b4eb65470e..a60fcb429a 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -196,17 +196,56 @@ typedef struct QCowCreateState {
 
 struct QCowAIOCB;
 
-/* XXX This could be private for qcow2-cluster.c */
+typedef struct Qcow2COWRegion {
+    /**
+     * Offset of the COW region in bytes from the start of the first cluster
+     * touched by the request.
+     */
+    uint64_t    offset;
+
+    /** Number of sectors to copy */
+    int         nb_sectors;
+} Qcow2COWRegion;
+
+/**
+ * Describes an in-flight (part of a) write request that writes to clusters
+ * that are not referenced in their L2 table yet.
+ */
 typedef struct QCowL2Meta
 {
+    /** Guest offset of the first newly allocated cluster */
     uint64_t offset;
-    uint64_t cluster_offset;
+
+    /** Host offset of the first newly allocated cluster */
     uint64_t alloc_offset;
-    int n_start;
+
+    /**
+     * Number of sectors from the start of the first allocated cluster to
+     * the end of the (possibly shortened) request
+     */
     int nb_available;
+
+    /** Number of newly allocated clusters */
     int nb_clusters;
+
+    /**
+     * Requests that overlap with this allocation and wait to be restarted
+     * when the allocating request has completed.
+     */
     CoQueue dependent_requests;
 
+    /**
+     * The COW Region between the start of the first allocated cluster and the
+     * area the guest actually writes to.
+     */
+    Qcow2COWRegion cow_start;
+
+    /**
+     * The COW Region between the area the guest actually writes to and the
+     * end of the last allocated cluster.
+     */
+    Qcow2COWRegion cow_end;
+
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;
 
@@ -264,6 +303,8 @@ static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
 /* qcow2.c functions */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
                   int64_t sector_num, int nb_sectors);
+
+int qcow2_mark_dirty(BlockDriverState *bs);
 int qcow2_update_header(BlockDriverState *bs);
 
 /* qcow2-refcount.c functions */
@@ -297,7 +338,7 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int *num, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int n_start, int n_end, int *num, QCowL2Meta *m);
+    int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                          uint64_t offset,
                                          int compressed_size);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 550c81f22b..abfedbea73 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -708,22 +708,6 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
     return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
-static BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd,
-        unsigned long int req, void *buf,
-        BlockDriverCompletionFunc *cb, void *opaque)
-{
-    RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
-
-    acb->bs = bs;
-    acb->aio_type = QEMU_AIO_IOCTL;
-    acb->aio_fildes = fd;
-    acb->aio_offset = 0;
-    acb->aio_ioctl_buf = buf;
-    acb->aio_ioctl_cmd = req;
-
-    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
-}
-
 static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -1346,10 +1330,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     BDRVRawState *s = bs->opaque;
+    RawPosixAIOData *acb;
 
     if (fd_open(bs) < 0)
         return NULL;
-    return paio_ioctl(bs, s->fd, req, buf, cb, opaque);
+
+    acb = g_slice_new(RawPosixAIOData);
+    acb->bs = bs;
+    acb->aio_type = QEMU_AIO_IOCTL;
+    acb->aio_fildes = s->fd;
+    acb->aio_offset = 0;
+    acb->aio_ioctl_buf = buf;
+    acb->aio_ioctl_cmd = req;
+    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
 }
 
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 0c05c58c5a..ce207a3109 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -303,13 +303,24 @@ static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
     BDRVRawState *s = bs->opaque;
     LONG low, high;
+    DWORD dwPtrLow;
 
     low = offset;
     high = offset >> 32;
-    if (!SetFilePointer(s->hfile, low, &high, FILE_BEGIN))
-	return -EIO;
-    if (!SetEndOfFile(s->hfile))
+
+    /*
+     * An error has occurred if the return value is INVALID_SET_FILE_POINTER
+     * and GetLastError doesn't return NO_ERROR.
+     */
+    dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN);
+    if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
+        fprintf(stderr, "SetFilePointer error: %d\n", GetLastError());
+        return -EIO;
+    }
+    if (SetEndOfFile(s->hfile) == 0) {
+        fprintf(stderr, "SetEndOfFile error: %d\n", GetLastError());
         return -EIO;
+    }
     return 0;
 }
 
diff --git a/block/rbd.c b/block/rbd.c
index f3becc7a8b..737bab16cc 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -77,6 +77,7 @@ typedef struct RBDAIOCB {
     int error;
     struct BDRVRBDState *s;
     int cancelled;
+    int status;
 } RBDAIOCB;
 
 typedef struct RADOSCB {
@@ -376,12 +377,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
     RBDAIOCB *acb = rcb->acb;
     int64_t r;
 
-    if (acb->cancelled) {
-        qemu_vfree(acb->bounce);
-        qemu_aio_release(acb);
-        goto done;
-    }
-
     r = rcb->ret;
 
     if (acb->cmd == RBD_AIO_WRITE ||
@@ -409,7 +404,6 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
     /* Note that acb->bh can be NULL in case where the aio was cancelled */
     acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
     qemu_bh_schedule(acb->bh);
-done:
     g_free(rcb);
 }
 
@@ -568,6 +562,12 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     RBDAIOCB *acb = (RBDAIOCB *) blockacb;
     acb->cancelled = 1;
+
+    while (acb->status == -EINPROGRESS) {
+        qemu_aio_wait();
+    }
+
+    qemu_aio_release(acb);
 }
 
 static const AIOCBInfo rbd_aiocb_info = {
@@ -639,8 +639,11 @@ static void rbd_aio_bh_cb(void *opaque)
     acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
     qemu_bh_delete(acb->bh);
     acb->bh = NULL;
+    acb->status = 0;
 
-    qemu_aio_release(acb);
+    if (!acb->cancelled) {
+        qemu_aio_release(acb);
+    }
 }
 
 static int rbd_aio_discard_wrapper(rbd_image_t image,
@@ -685,6 +688,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb->s = s;
     acb->cancelled = 0;
     acb->bh = NULL;
+    acb->status = -EINPROGRESS;
 
     if (cmd == RBD_AIO_WRITE) {
         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
diff --git a/block/stream.c b/block/stream.c
index 0c0fc7a13b..0dcd286035 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -108,7 +108,7 @@ static void coroutine_fn stream_run(void *opaque)
 
 wait:
         /* Note that even when no rate limit is applied we need to yield
-         * with no pending I/O here so that qemu_aio_flush() returns.
+         * with no pending I/O here so that bdrv_drain_all() returns.
          */
         block_job_sleep_ns(&s->common, rt_clock, delay_ns);
         if (block_job_is_cancelled(&s->common)) {
diff --git a/block/vpc.c b/block/vpc.c
index b6bf52f140..566e9a3b37 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -26,6 +26,9 @@
 #include "block_int.h"
 #include "module.h"
 #include "migration.h"
+#if defined(CONFIG_UUID)
+#include <uuid/uuid.h>
+#endif
 
 /**************************************************************/
 
@@ -198,7 +201,8 @@ static int vpc_open(BlockDriverState *bs, int flags)
     bs->total_sectors = (int64_t)
         be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
 
-    if (bs->total_sectors >= 65535 * 16 * 255) {
+    /* Allow a maximum disk size of approximately 2 TB */
+    if (bs->total_sectors >= 65535LL * 255 * 255) {
         err = -EFBIG;
         goto fail;
     }
@@ -524,19 +528,27 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
  * Note that the geometry doesn't always exactly match total_sectors but
  * may round it down.
  *
- * Returns 0 on success, -EFBIG if the size is larger than 127 GB
+ * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override
+ * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
+ * and instead allow up to 255 heads.
  */
 static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
     uint8_t* heads, uint8_t* secs_per_cyl)
 {
     uint32_t cyls_times_heads;
 
-    if (total_sectors > 65535 * 16 * 255)
+    /* Allow a maximum disk size of approximately 2 TB */
+    if (total_sectors > 65535LL * 255 * 255) {
         return -EFBIG;
+    }
 
     if (total_sectors > 65535 * 16 * 63) {
         *secs_per_cyl = 255;
-        *heads = 16;
+        if (total_sectors > 65535 * 16 * 255) {
+            *heads = 255;
+        } else {
+            *heads = 16;
+        }
         cyls_times_heads = total_sectors / *secs_per_cyl;
     } else {
         *secs_per_cyl = 17;
@@ -739,7 +751,9 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
 
     footer->type = be32_to_cpu(disk_type);
 
-    /* TODO uuid is missing */
+#if defined(CONFIG_UUID)
+    uuid_generate(footer->uuid);
+#endif
 
     footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
 
diff --git a/block_int.h b/block_int.h
index 9deedb811a..bf3f79b3db 100644
--- a/block_int.h
+++ b/block_int.h
@@ -190,6 +190,12 @@ struct BlockDriver {
 
     void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
 
+    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+        const char *tag);
+    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
     /*
      * Returns 1 if newly created images are guaranteed to contain only
      * zeros, 0 otherwise.
diff --git a/blockdev.c b/blockdev.c
index e73fd6e388..9a05e57009 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -275,7 +275,7 @@ static bool do_check_io_limits(BlockIOLimit *io_limits)
     return true;
 }
 
-DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
+DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
 {
     const char *buf;
     const char *file = NULL;
@@ -325,7 +325,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
             return NULL;
 	}
     } else {
-        type = default_to_scsi ? IF_SCSI : IF_IDE;
+        type = block_default_type;
     }
 
     max_devs = if_max_devs[type];
@@ -568,7 +568,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi)
         break;
     case IF_VIRTIO:
         /* add virtio block device */
-        opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL);
+        opts = qemu_opts_create_nofail(qemu_find_opts("device"));
         if (arch_type == QEMU_ARCH_S390X) {
             qemu_opt_set(opts, "driver", "virtio-blk-s390");
         } else {
@@ -707,6 +707,7 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
     int ret = 0;
     BlockdevActionList *dev_entry = dev_list;
     BlkTransactionStates *states, *next;
+    Error *local_err = NULL;
 
     QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
     QSIMPLEQ_INIT(&snap_bdrv_states);
@@ -786,12 +787,12 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
 
         /* create new image w/backing file */
         if (mode != NEW_IMAGE_MODE_EXISTING) {
-            ret = bdrv_img_create(new_image_file, format,
-                                  states->old_bs->filename,
-                                  states->old_bs->drv->format_name,
-                                  NULL, -1, flags);
-            if (ret) {
-                error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
+            bdrv_img_create(new_image_file, format,
+                            states->old_bs->filename,
+                            states->old_bs->drv->format_name,
+                            NULL, -1, flags, &local_err);
+            if (error_is_set(&local_err)) {
+                error_propagate(errp, local_err);
                 goto delete_and_fail;
             }
         }
@@ -1263,8 +1264,8 @@ void qmp_drive_mirror(const char *device, const char *target,
         assert(format && drv);
         bdrv_get_geometry(bs, &size);
         size *= 512;
-        ret = bdrv_img_create(target, format,
-                              NULL, NULL, NULL, size, flags);
+        bdrv_img_create(target, format,
+                        NULL, NULL, NULL, size, flags, &local_err);
     } else {
         switch (mode) {
         case NEW_IMAGE_MODE_EXISTING:
@@ -1272,18 +1273,18 @@ void qmp_drive_mirror(const char *device, const char *target,
             break;
         case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
             /* create new image with backing file */
-            ret = bdrv_img_create(target, format,
-                                  source->filename,
-                                  source->drv->format_name,
-                                  NULL, -1, flags);
+            bdrv_img_create(target, format,
+                            source->filename,
+                            source->drv->format_name,
+                            NULL, -1, flags, &local_err);
             break;
         default:
             abort();
         }
     }
 
-    if (ret) {
-        error_set(errp, QERR_OPEN_FILE_FAILED, target);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
         return;
     }
 
diff --git a/blockdev.h b/blockdev.h
index 5f27b643be..d73d552a98 100644
--- a/blockdev.h
+++ b/blockdev.h
@@ -19,8 +19,13 @@ void blockdev_auto_del(BlockDriverState *bs);
 
 typedef enum {
     IF_DEFAULT = -1,            /* for use with drive_add() only */
+    /*
+     * IF_IDE must be zero, because we want QEMUMachine member
+     * block_default_type to default-initialize to IF_IDE
+     */
+    IF_IDE = 0,
     IF_NONE,
-    IF_IDE, IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN,
+    IF_SCSI, IF_FLOPPY, IF_PFLASH, IF_MTD, IF_SD, IF_VIRTIO, IF_XEN,
     IF_COUNT
 } BlockInterfaceType;
 
@@ -51,7 +56,7 @@ DriveInfo *drive_get_by_blockdev(BlockDriverState *bs);
 QemuOpts *drive_def(const char *optstr);
 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
                     const char *optstr);
-DriveInfo *drive_init(QemuOpts *arg, int default_to_scsi);
+DriveInfo *drive_init(QemuOpts *arg, BlockInterfaceType block_default_type);
 
 /* device-hotplug */
 
diff --git a/configure b/configure
index 38b1cc6b35..ecdb33a58e 100755
--- a/configure
+++ b/configure
@@ -2127,7 +2127,7 @@ fi
 # pixman support probe
 
 if test "$pixman" = ""; then
-  if $pkg_config --atleast-version=0.18.4 pixman-1 > /dev/null 2>&1; then
+  if $pkg_config pixman-1 > /dev/null 2>&1; then
     pixman="system"
   else
     pixman="internal"
@@ -2138,7 +2138,7 @@ if test "$pixman" = "system"; then
   pixman_libs=`$pkg_config --libs pixman-1 2>/dev/null`
 else
   if test ! -d ${source_path}/pixman/pixman; then
-    echo "ERROR: pixman not present (or older than 0.18.4). Your options:"
+    echo "ERROR: pixman not present. Your options:"
     echo "  (1) Preferred: Install the pixman devel package (any recent"
     echo "      distro should have packages as Xorg needs pixman too)."
     echo "  (2) Fetch the pixman submodule, using:"
diff --git a/console.h b/console.h
index 50a0512f32..edb1950871 100644
--- a/console.h
+++ b/console.h
@@ -229,6 +229,16 @@ static inline void unregister_displaychangelistener(DisplayState *ds,
 static inline void dpy_gfx_update(DisplayState *s, int x, int y, int w, int h)
 {
     struct DisplayChangeListener *dcl;
+    int width = pixman_image_get_width(s->surface->image);
+    int height = pixman_image_get_height(s->surface->image);
+
+    x = MAX(x, 0);
+    y = MAX(y, 0);
+    x = MIN(x, width);
+    y = MIN(y, height);
+    w = MIN(w, width - x);
+    h = MIN(h, height - y);
+
     QLIST_FOREACH(dcl, &s->listeners, next) {
         if (dcl->dpy_gfx_update) {
             dcl->dpy_gfx_update(s, x, y, w, h);
diff --git a/exec-all.h b/exec-all.h
index b18d4ca534..e9b07cd986 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -80,8 +80,8 @@ void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
 void cpu_gen_init(void);
 int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb,
                  int *gen_code_size_ptr);
-int cpu_restore_state(struct TranslationBlock *tb,
-                      CPUArchState *env, uintptr_t searched_pc);
+bool cpu_restore_state(CPUArchState *env, uintptr_t searched_pc);
+
 void QEMU_NORETURN cpu_resume_from_signal(CPUArchState *env1, void *puc);
 void QEMU_NORETURN cpu_io_recompile(CPUArchState *env, uintptr_t retaddr);
 TranslationBlock *tb_gen_code(CPUArchState *env, 
@@ -275,8 +275,6 @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
     }
 }
 
-TranslationBlock *tb_find_pc(uintptr_t pc_ptr);
-
 #include "qemu-lock.h"
 
 extern spinlock_t tb_lock;
diff --git a/exec.c b/exec.c
index 0594b07057..4c1246a9f9 100644
--- a/exec.c
+++ b/exec.c
@@ -1,5 +1,5 @@
 /*
- *  virtual page mapping and translated block handling
+ *  Virtual page mapping
  *
  *  Copyright (c) 2003 Fabrice Bellard
  *
@@ -38,62 +38,20 @@
 #include "exec-memory.h"
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include <sys/param.h>
-#if __FreeBSD_version >= 700104
-#define HAVE_KINFO_GETVMMAP
-#define sigqueue sigqueue_freebsd  /* avoid redefinition */
-#include <sys/time.h>
-#include <sys/proc.h>
-#include <machine/profile.h>
-#define _KERNEL
-#include <sys/user.h>
-#undef _KERNEL
-#undef sigqueue
-#include <libutil.h>
-#endif
-#endif
 #else /* !CONFIG_USER_ONLY */
 #include "xen-mapcache.h"
 #include "trace.h"
 #endif
 
 #include "cputlb.h"
+#include "translate-all.h"
 
 #include "memory-internal.h"
 
-//#define DEBUG_TB_INVALIDATE
-//#define DEBUG_FLUSH
 //#define DEBUG_UNASSIGNED
-
-/* make various TB consistency checks */
-//#define DEBUG_TB_CHECK
-
-//#define DEBUG_IOPORT
 //#define DEBUG_SUBPAGE
 
 #if !defined(CONFIG_USER_ONLY)
-/* TB consistency checks only implemented for usermode emulation.  */
-#undef DEBUG_TB_CHECK
-#endif
-
-#define SMC_BITMAP_USE_THRESHOLD 10
-
-static TranslationBlock *tbs;
-static int code_gen_max_blocks;
-TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
-static int nb_tbs;
-/* any access to the tbs or the page table must use this lock */
-spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
-
-uint8_t *code_gen_prologue;
-static uint8_t *code_gen_buffer;
-static size_t code_gen_buffer_size;
-/* threshold to flush the translated code buffer */
-static size_t code_gen_buffer_max_size;
-static uint8_t *code_gen_ptr;
-
-#if !defined(CONFIG_USER_ONLY)
 int phys_ram_fd;
 static int in_migration;
 
@@ -120,59 +78,6 @@ DEFINE_TLS(CPUArchState *,cpu_single_env);
    2 = Adaptive rate instruction counting.  */
 int use_icount = 0;
 
-typedef struct PageDesc {
-    /* list of TBs intersecting this ram page */
-    TranslationBlock *first_tb;
-    /* in order to optimize self modifying code, we count the number
-       of lookups we do to a given page to use a bitmap */
-    unsigned int code_write_count;
-    uint8_t *code_bitmap;
-#if defined(CONFIG_USER_ONLY)
-    unsigned long flags;
-#endif
-} PageDesc;
-
-/* In system mode we want L1_MAP to be based on ram offsets,
-   while in user mode we want it to be based on virtual addresses.  */
-#if !defined(CONFIG_USER_ONLY)
-#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
-# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
-#else
-# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
-#endif
-#else
-# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
-#endif
-
-/* Size of the L2 (and L3, etc) page tables.  */
-#define L2_BITS 10
-#define L2_SIZE (1 << L2_BITS)
-
-#define P_L2_LEVELS \
-    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
-
-/* The bits remaining after N lower levels of page tables.  */
-#define V_L1_BITS_REM \
-    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
-
-#if V_L1_BITS_REM < 4
-#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
-#else
-#define V_L1_BITS  V_L1_BITS_REM
-#endif
-
-#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
-
-#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
-
-uintptr_t qemu_real_host_page_size;
-uintptr_t qemu_host_page_size;
-uintptr_t qemu_host_page_mask;
-
-/* This is a multi-level map on the virtual address space.
-   The bottom level has pointers to PageDesc.  */
-static void *l1_map[V_L1_SIZE];
-
 #if !defined(CONFIG_USER_ONLY)
 
 static MemoryRegionSection *phys_sections;
@@ -194,179 +99,6 @@ static void *qemu_safe_ram_ptr(ram_addr_t addr);
 
 static MemoryRegion io_mem_watch;
 #endif
-static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
-                         tb_page_addr_t phys_page2);
-
-/* statistics */
-static int tb_flush_count;
-static int tb_phys_invalidate_count;
-
-#ifdef _WIN32
-static inline void map_exec(void *addr, long size)
-{
-    DWORD old_protect;
-    VirtualProtect(addr, size,
-                   PAGE_EXECUTE_READWRITE, &old_protect);
-    
-}
-#else
-static inline void map_exec(void *addr, long size)
-{
-    unsigned long start, end, page_size;
-    
-    page_size = getpagesize();
-    start = (unsigned long)addr;
-    start &= ~(page_size - 1);
-    
-    end = (unsigned long)addr + size;
-    end += page_size - 1;
-    end &= ~(page_size - 1);
-    
-    mprotect((void *)start, end - start,
-             PROT_READ | PROT_WRITE | PROT_EXEC);
-}
-#endif
-
-static void page_init(void)
-{
-    /* NOTE: we can always suppose that qemu_host_page_size >=
-       TARGET_PAGE_SIZE */
-#ifdef _WIN32
-    {
-        SYSTEM_INFO system_info;
-
-        GetSystemInfo(&system_info);
-        qemu_real_host_page_size = system_info.dwPageSize;
-    }
-#else
-    qemu_real_host_page_size = getpagesize();
-#endif
-    if (qemu_host_page_size == 0)
-        qemu_host_page_size = qemu_real_host_page_size;
-    if (qemu_host_page_size < TARGET_PAGE_SIZE)
-        qemu_host_page_size = TARGET_PAGE_SIZE;
-    qemu_host_page_mask = ~(qemu_host_page_size - 1);
-
-#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
-    {
-#ifdef HAVE_KINFO_GETVMMAP
-        struct kinfo_vmentry *freep;
-        int i, cnt;
-
-        freep = kinfo_getvmmap(getpid(), &cnt);
-        if (freep) {
-            mmap_lock();
-            for (i = 0; i < cnt; i++) {
-                unsigned long startaddr, endaddr;
-
-                startaddr = freep[i].kve_start;
-                endaddr = freep[i].kve_end;
-                if (h2g_valid(startaddr)) {
-                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
-
-                    if (h2g_valid(endaddr)) {
-                        endaddr = h2g(endaddr);
-                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
-                    } else {
-#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
-                        endaddr = ~0ul;
-                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
-#endif
-                    }
-                }
-            }
-            free(freep);
-            mmap_unlock();
-        }
-#else
-        FILE *f;
-
-        last_brk = (unsigned long)sbrk(0);
-
-        f = fopen("/compat/linux/proc/self/maps", "r");
-        if (f) {
-            mmap_lock();
-
-            do {
-                unsigned long startaddr, endaddr;
-                int n;
-
-                n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
-
-                if (n == 2 && h2g_valid(startaddr)) {
-                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
-
-                    if (h2g_valid(endaddr)) {
-                        endaddr = h2g(endaddr);
-                    } else {
-                        endaddr = ~0ul;
-                    }
-                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
-                }
-            } while (!feof(f));
-
-            fclose(f);
-            mmap_unlock();
-        }
-#endif
-    }
-#endif
-}
-
-static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
-{
-    PageDesc *pd;
-    void **lp;
-    int i;
-
-#if defined(CONFIG_USER_ONLY)
-    /* We can't use g_malloc because it may recurse into a locked mutex. */
-# define ALLOC(P, SIZE)                                 \
-    do {                                                \
-        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
-                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
-    } while (0)
-#else
-# define ALLOC(P, SIZE) \
-    do { P = g_malloc0(SIZE); } while (0)
-#endif
-
-    /* Level 1.  Always allocated.  */
-    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
-
-    /* Level 2..N-1.  */
-    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
-        void **p = *lp;
-
-        if (p == NULL) {
-            if (!alloc) {
-                return NULL;
-            }
-            ALLOC(p, sizeof(void *) * L2_SIZE);
-            *lp = p;
-        }
-
-        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
-    }
-
-    pd = *lp;
-    if (pd == NULL) {
-        if (!alloc) {
-            return NULL;
-        }
-        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
-        *lp = pd;
-    }
-
-#undef ALLOC
-
-    return pd + (index & (L2_SIZE - 1));
-}
-
-static inline PageDesc *page_find(tb_page_addr_t index)
-{
-    return page_find_alloc(index, 0);
-}
 
 #if !defined(CONFIG_USER_ONLY)
 
@@ -474,177 +206,8 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
         && mr != &io_mem_notdirty && !mr->rom_device
         && mr != &io_mem_watch;
 }
-
-#define mmap_lock() do { } while(0)
-#define mmap_unlock() do { } while(0)
-#endif
-
-#if defined(CONFIG_USER_ONLY)
-/* Currently it is not recommended to allocate big chunks of data in
-   user mode. It will change when a dedicated libc will be used.  */
-/* ??? 64-bit hosts ought to have no problem mmaping data outside the
-   region in which the guest needs to run.  Revisit this.  */
-#define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
-/* ??? Should configure for this, not list operating systems here.  */
-#if (defined(__linux__) \
-    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__))
-# define USE_MMAP
-#endif
-
-/* Minimum size of the code gen buffer.  This number is randomly chosen,
-   but not so small that we can't have a fair number of TB's live.  */
-#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
-
-/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
-   indicated, this is constrained by the range of direct branches on the
-   host cpu, as used by the TCG implementation of goto_tb.  */
-#if defined(__x86_64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
-#elif defined(__sparc__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
-#elif defined(__arm__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
-#elif defined(__s390x__)
-  /* We have a +- 4GB range on the branches; leave some slop.  */
-# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
-#else
-# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
-#endif
-
-#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
-
-#define DEFAULT_CODE_GEN_BUFFER_SIZE \
-  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
-   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
-
-static inline size_t size_code_gen_buffer(size_t tb_size)
-{
-    /* Size the buffer.  */
-    if (tb_size == 0) {
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-#else
-        /* ??? Needs adjustments.  */
-        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
-           static buffer, we could size this on RESERVED_VA, on the text
-           segment size of the executable, or continue to use the default.  */
-        tb_size = (unsigned long)(ram_size / 4);
-#endif
-    }
-    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
-        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
-    }
-    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
-        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
-    }
-    code_gen_buffer_size = tb_size;
-    return tb_size;
-}
-
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
-    __attribute__((aligned(CODE_GEN_ALIGN)));
-
-static inline void *alloc_code_gen_buffer(void)
-{
-    map_exec(static_code_gen_buffer, code_gen_buffer_size);
-    return static_code_gen_buffer;
-}
-#elif defined(USE_MMAP)
-static inline void *alloc_code_gen_buffer(void)
-{
-    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
-    uintptr_t start = 0;
-    void *buf;
-
-    /* Constrain the position of the buffer based on the host cpu.
-       Note that these addresses are chosen in concert with the
-       addresses assigned in the relevant linker script file.  */
-# if defined(__PIE__) || defined(__PIC__)
-    /* Don't bother setting a preferred location if we're building
-       a position-independent executable.  We're more likely to get
-       an address near the main executable if we let the kernel
-       choose the address.  */
-# elif defined(__x86_64__) && defined(MAP_32BIT)
-    /* Force the memory down into low memory with the executable.
-       Leave the choice of exact location with the kernel.  */
-    flags |= MAP_32BIT;
-    /* Cannot expect to map more than 800MB in low memory.  */
-    if (code_gen_buffer_size > 800u * 1024 * 1024) {
-        code_gen_buffer_size = 800u * 1024 * 1024;
-    }
-# elif defined(__sparc__)
-    start = 0x40000000ul;
-# elif defined(__s390x__)
-    start = 0x90000000ul;
-# endif
-
-    buf = mmap((void *)start, code_gen_buffer_size,
-               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
-    return buf == MAP_FAILED ? NULL : buf;
-}
-#else
-static inline void *alloc_code_gen_buffer(void)
-{
-    void *buf = g_malloc(code_gen_buffer_size);
-    if (buf) {
-        map_exec(buf, code_gen_buffer_size);
-    }
-    return buf;
-}
-#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
-
-static inline void code_gen_alloc(size_t tb_size)
-{
-    code_gen_buffer_size = size_code_gen_buffer(tb_size);
-    code_gen_buffer = alloc_code_gen_buffer();
-    if (code_gen_buffer == NULL) {
-        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-        exit(1);
-    }
-
-    qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE);
-
-    /* Steal room for the prologue at the end of the buffer.  This ensures
-       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
-       from TB's to the prologue are going to be in range.  It also means
-       that we don't need to mark (additional) portions of the data segment
-       as executable.  */
-    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
-    code_gen_buffer_size -= 1024;
-
-    code_gen_buffer_max_size = code_gen_buffer_size -
-        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
-}
-
-/* Must be called before using the QEMU cpus. 'tb_size' is the size
-   (in bytes) allocated to the translation buffer. Zero means default
-   size. */
-void tcg_exec_init(unsigned long tb_size)
-{
-    cpu_gen_init();
-    code_gen_alloc(tb_size);
-    code_gen_ptr = code_gen_buffer;
-    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
-    page_init();
-#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
-    /* There's no guest base to take into account, so go ahead and
-       initialize the prologue now.  */
-    tcg_prologue_init(&tcg_ctx);
-#endif
-}
-
-bool tcg_enabled(void)
-{
-    return code_gen_buffer != NULL;
-}
-
 void cpu_exec_init_all(void)
 {
 #if !defined(CONFIG_USER_ONLY)
@@ -730,763 +293,6 @@ void cpu_exec_init(CPUArchState *env)
 #endif
 }
 
-/* Allocate a new translation block. Flush the translation buffer if
-   too many translation blocks or too much generated code. */
-static TranslationBlock *tb_alloc(target_ulong pc)
-{
-    TranslationBlock *tb;
-
-    if (nb_tbs >= code_gen_max_blocks ||
-        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
-        return NULL;
-    tb = &tbs[nb_tbs++];
-    tb->pc = pc;
-    tb->cflags = 0;
-    return tb;
-}
-
-void tb_free(TranslationBlock *tb)
-{
-    /* In practice this is mostly used for single use temporary TB
-       Ignore the hard cases and just back up if this TB happens to
-       be the last one generated.  */
-    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
-        code_gen_ptr = tb->tc_ptr;
-        nb_tbs--;
-    }
-}
-
-static inline void invalidate_page_bitmap(PageDesc *p)
-{
-    if (p->code_bitmap) {
-        g_free(p->code_bitmap);
-        p->code_bitmap = NULL;
-    }
-    p->code_write_count = 0;
-}
-
-/* Set to NULL all the 'first_tb' fields in all PageDescs. */
-
-static void page_flush_tb_1 (int level, void **lp)
-{
-    int i;
-
-    if (*lp == NULL) {
-        return;
-    }
-    if (level == 0) {
-        PageDesc *pd = *lp;
-        for (i = 0; i < L2_SIZE; ++i) {
-            pd[i].first_tb = NULL;
-            invalidate_page_bitmap(pd + i);
-        }
-    } else {
-        void **pp = *lp;
-        for (i = 0; i < L2_SIZE; ++i) {
-            page_flush_tb_1 (level - 1, pp + i);
-        }
-    }
-}
-
-static void page_flush_tb(void)
-{
-    int i;
-    for (i = 0; i < V_L1_SIZE; i++) {
-        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
-    }
-}
-
-/* flush all the translation blocks */
-/* XXX: tb_flush is currently not thread safe */
-void tb_flush(CPUArchState *env1)
-{
-    CPUArchState *env;
-#if defined(DEBUG_FLUSH)
-    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
-           (unsigned long)(code_gen_ptr - code_gen_buffer),
-           nb_tbs, nb_tbs > 0 ?
-           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
-#endif
-    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
-        cpu_abort(env1, "Internal error: code buffer overflow\n");
-
-    nb_tbs = 0;
-
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
-    }
-
-    memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
-    page_flush_tb();
-
-    code_gen_ptr = code_gen_buffer;
-    /* XXX: flush processor icache at this point if cache flush is
-       expensive */
-    tb_flush_count++;
-}
-
-#ifdef DEBUG_TB_CHECK
-
-static void tb_invalidate_check(target_ulong address)
-{
-    TranslationBlock *tb;
-    int i;
-    address &= TARGET_PAGE_MASK;
-    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
-            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
-                  address >= tb->pc + tb->size)) {
-                printf("ERROR invalidate: address=" TARGET_FMT_lx
-                       " PC=%08lx size=%04x\n",
-                       address, (long)tb->pc, tb->size);
-            }
-        }
-    }
-}
-
-/* verify that all the pages have correct rights for code */
-static void tb_page_check(void)
-{
-    TranslationBlock *tb;
-    int i, flags1, flags2;
-
-    for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
-            flags1 = page_get_flags(tb->pc);
-            flags2 = page_get_flags(tb->pc + tb->size - 1);
-            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
-                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
-                       (long)tb->pc, tb->size, flags1, flags2);
-            }
-        }
-    }
-}
-
-#endif
-
-/* invalidate one TB */
-static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
-                             int next_offset)
-{
-    TranslationBlock *tb1;
-    for(;;) {
-        tb1 = *ptb;
-        if (tb1 == tb) {
-            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
-            break;
-        }
-        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
-    }
-}
-
-static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
-{
-    TranslationBlock *tb1;
-    unsigned int n1;
-
-    for(;;) {
-        tb1 = *ptb;
-        n1 = (uintptr_t)tb1 & 3;
-        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-        if (tb1 == tb) {
-            *ptb = tb1->page_next[n1];
-            break;
-        }
-        ptb = &tb1->page_next[n1];
-    }
-}
-
-static inline void tb_jmp_remove(TranslationBlock *tb, int n)
-{
-    TranslationBlock *tb1, **ptb;
-    unsigned int n1;
-
-    ptb = &tb->jmp_next[n];
-    tb1 = *ptb;
-    if (tb1) {
-        /* find tb(n) in circular list */
-        for(;;) {
-            tb1 = *ptb;
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == n && tb1 == tb)
-                break;
-            if (n1 == 2) {
-                ptb = &tb1->jmp_first;
-            } else {
-                ptb = &tb1->jmp_next[n1];
-            }
-        }
-        /* now we can suppress tb(n) from the list */
-        *ptb = tb->jmp_next[n];
-
-        tb->jmp_next[n] = NULL;
-    }
-}
-
-/* reset the jump entry 'n' of a TB so that it is not chained to
-   another TB */
-static inline void tb_reset_jump(TranslationBlock *tb, int n)
-{
-    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
-}
-
-void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
-{
-    CPUArchState *env;
-    PageDesc *p;
-    unsigned int h, n1;
-    tb_page_addr_t phys_pc;
-    TranslationBlock *tb1, *tb2;
-
-    /* remove the TB from the hash list */
-    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-    h = tb_phys_hash_func(phys_pc);
-    tb_remove(&tb_phys_hash[h], tb,
-              offsetof(TranslationBlock, phys_hash_next));
-
-    /* remove the TB from the page list */
-    if (tb->page_addr[0] != page_addr) {
-        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
-        tb_page_remove(&p->first_tb, tb);
-        invalidate_page_bitmap(p);
-    }
-    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
-        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
-        tb_page_remove(&p->first_tb, tb);
-        invalidate_page_bitmap(p);
-    }
-
-    tb_invalidated_flag = 1;
-
-    /* remove the TB from the hash list */
-    h = tb_jmp_cache_hash_func(tb->pc);
-    for(env = first_cpu; env != NULL; env = env->next_cpu) {
-        if (env->tb_jmp_cache[h] == tb)
-            env->tb_jmp_cache[h] = NULL;
-    }
-
-    /* suppress this TB from the two jump lists */
-    tb_jmp_remove(tb, 0);
-    tb_jmp_remove(tb, 1);
-
-    /* suppress any remaining jumps to this TB */
-    tb1 = tb->jmp_first;
-    for(;;) {
-        n1 = (uintptr_t)tb1 & 3;
-        if (n1 == 2)
-            break;
-        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-        tb2 = tb1->jmp_next[n1];
-        tb_reset_jump(tb1, n1);
-        tb1->jmp_next[n1] = NULL;
-        tb1 = tb2;
-    }
-    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
-
-    tb_phys_invalidate_count++;
-}
-
-static inline void set_bits(uint8_t *tab, int start, int len)
-{
-    int end, mask, end1;
-
-    end = start + len;
-    tab += start >> 3;
-    mask = 0xff << (start & 7);
-    if ((start & ~7) == (end & ~7)) {
-        if (start < end) {
-            mask &= ~(0xff << (end & 7));
-            *tab |= mask;
-        }
-    } else {
-        *tab++ |= mask;
-        start = (start + 8) & ~7;
-        end1 = end & ~7;
-        while (start < end1) {
-            *tab++ = 0xff;
-            start += 8;
-        }
-        if (start < end) {
-            mask = ~(0xff << (end & 7));
-            *tab |= mask;
-        }
-    }
-}
-
-static void build_page_bitmap(PageDesc *p)
-{
-    int n, tb_start, tb_end;
-    TranslationBlock *tb;
-
-    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
-
-    tb = p->first_tb;
-    while (tb != NULL) {
-        n = (uintptr_t)tb & 3;
-        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
-        /* NOTE: this is subtle as a TB may span two physical pages */
-        if (n == 0) {
-            /* NOTE: tb_end may be after the end of the page, but
-               it is not a problem */
-            tb_start = tb->pc & ~TARGET_PAGE_MASK;
-            tb_end = tb_start + tb->size;
-            if (tb_end > TARGET_PAGE_SIZE)
-                tb_end = TARGET_PAGE_SIZE;
-        } else {
-            tb_start = 0;
-            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
-        }
-        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
-        tb = tb->page_next[n];
-    }
-}
-
-TranslationBlock *tb_gen_code(CPUArchState *env,
-                              target_ulong pc, target_ulong cs_base,
-                              int flags, int cflags)
-{
-    TranslationBlock *tb;
-    uint8_t *tc_ptr;
-    tb_page_addr_t phys_pc, phys_page2;
-    target_ulong virt_page2;
-    int code_gen_size;
-
-    phys_pc = get_page_addr_code(env, pc);
-    tb = tb_alloc(pc);
-    if (!tb) {
-        /* flush must be done */
-        tb_flush(env);
-        /* cannot fail at this point */
-        tb = tb_alloc(pc);
-        /* Don't forget to invalidate previous TB info.  */
-        tb_invalidated_flag = 1;
-    }
-    tc_ptr = code_gen_ptr;
-    tb->tc_ptr = tc_ptr;
-    tb->cs_base = cs_base;
-    tb->flags = flags;
-    tb->cflags = cflags;
-    cpu_gen_code(env, tb, &code_gen_size);
-    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
-                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
-
-    /* check next page if needed */
-    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
-    phys_page2 = -1;
-    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
-        phys_page2 = get_page_addr_code(env, virt_page2);
-    }
-    tb_link_page(tb, phys_pc, phys_page2);
-    return tb;
-}
-
-/*
- * Invalidate all TBs which intersect with the target physical address range
- * [start;end[. NOTE: start and end may refer to *different* physical pages.
- * 'is_cpu_write_access' should be true if called from a real cpu write
- * access: the virtual CPU will exit the current TB if code is modified inside
- * this TB.
- */
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
-                              int is_cpu_write_access)
-{
-    while (start < end) {
-        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
-        start &= TARGET_PAGE_MASK;
-        start += TARGET_PAGE_SIZE;
-    }
-}
-
-/*
- * Invalidate all TBs which intersect with the target physical address range
- * [start;end[. NOTE: start and end must refer to the *same* physical page.
- * 'is_cpu_write_access' should be true if called from a real cpu write
- * access: the virtual CPU will exit the current TB if code is modified inside
- * this TB.
- */
-void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
-                                   int is_cpu_write_access)
-{
-    TranslationBlock *tb, *tb_next, *saved_tb;
-    CPUArchState *env = cpu_single_env;
-    tb_page_addr_t tb_start, tb_end;
-    PageDesc *p;
-    int n;
-#ifdef TARGET_HAS_PRECISE_SMC
-    int current_tb_not_found = is_cpu_write_access;
-    TranslationBlock *current_tb = NULL;
-    int current_tb_modified = 0;
-    target_ulong current_pc = 0;
-    target_ulong current_cs_base = 0;
-    int current_flags = 0;
-#endif /* TARGET_HAS_PRECISE_SMC */
-
-    p = page_find(start >> TARGET_PAGE_BITS);
-    if (!p)
-        return;
-    if (!p->code_bitmap &&
-        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
-        is_cpu_write_access) {
-        /* build code bitmap */
-        build_page_bitmap(p);
-    }
-
-    /* we remove all the TBs in the range [start, end[ */
-    /* XXX: see if in some cases it could be faster to invalidate all the code */
-    tb = p->first_tb;
-    while (tb != NULL) {
-        n = (uintptr_t)tb & 3;
-        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
-        tb_next = tb->page_next[n];
-        /* NOTE: this is subtle as a TB may span two physical pages */
-        if (n == 0) {
-            /* NOTE: tb_end may be after the end of the page, but
-               it is not a problem */
-            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-            tb_end = tb_start + tb->size;
-        } else {
-            tb_start = tb->page_addr[1];
-            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
-        }
-        if (!(tb_end <= start || tb_start >= end)) {
-#ifdef TARGET_HAS_PRECISE_SMC
-            if (current_tb_not_found) {
-                current_tb_not_found = 0;
-                current_tb = NULL;
-                if (env->mem_io_pc) {
-                    /* now we have a real cpu fault */
-                    current_tb = tb_find_pc(env->mem_io_pc);
-                }
-            }
-            if (current_tb == tb &&
-                (current_tb->cflags & CF_COUNT_MASK) != 1) {
-                /* If we are modifying the current TB, we must stop
-                its execution. We could be more precise by checking
-                that the modification is after the current PC, but it
-                would require a specialized function to partially
-                restore the CPU state */
-
-                current_tb_modified = 1;
-                cpu_restore_state(current_tb, env, env->mem_io_pc);
-                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
-                                     &current_flags);
-            }
-#endif /* TARGET_HAS_PRECISE_SMC */
-            /* we need to do that to handle the case where a signal
-               occurs while doing tb_phys_invalidate() */
-            saved_tb = NULL;
-            if (env) {
-                saved_tb = env->current_tb;
-                env->current_tb = NULL;
-            }
-            tb_phys_invalidate(tb, -1);
-            if (env) {
-                env->current_tb = saved_tb;
-                if (env->interrupt_request && env->current_tb)
-                    cpu_interrupt(env, env->interrupt_request);
-            }
-        }
-        tb = tb_next;
-    }
-#if !defined(CONFIG_USER_ONLY)
-    /* if no code remaining, no need to continue to use slow writes */
-    if (!p->first_tb) {
-        invalidate_page_bitmap(p);
-        if (is_cpu_write_access) {
-            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
-        }
-    }
-#endif
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (current_tb_modified) {
-        /* we generate a block containing just the instruction
-           modifying the memory. It will ensure that it cannot modify
-           itself */
-        env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
-        cpu_resume_from_signal(env, NULL);
-    }
-#endif
-}
-
-/* len must be <= 8 and start must be a multiple of len */
-static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
-{
-    PageDesc *p;
-    int offset, b;
-#if 0
-    if (1) {
-        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
-                  cpu_single_env->mem_io_vaddr, len,
-                  cpu_single_env->eip,
-                  cpu_single_env->eip +
-                  (intptr_t)cpu_single_env->segs[R_CS].base);
-    }
-#endif
-    p = page_find(start >> TARGET_PAGE_BITS);
-    if (!p)
-        return;
-    if (p->code_bitmap) {
-        offset = start & ~TARGET_PAGE_MASK;
-        b = p->code_bitmap[offset >> 3] >> (offset & 7);
-        if (b & ((1 << len) - 1))
-            goto do_invalidate;
-    } else {
-    do_invalidate:
-        tb_invalidate_phys_page_range(start, start + len, 1);
-    }
-}
-
-#if !defined(CONFIG_SOFTMMU)
-static void tb_invalidate_phys_page(tb_page_addr_t addr,
-                                    uintptr_t pc, void *puc)
-{
-    TranslationBlock *tb;
-    PageDesc *p;
-    int n;
-#ifdef TARGET_HAS_PRECISE_SMC
-    TranslationBlock *current_tb = NULL;
-    CPUArchState *env = cpu_single_env;
-    int current_tb_modified = 0;
-    target_ulong current_pc = 0;
-    target_ulong current_cs_base = 0;
-    int current_flags = 0;
-#endif
-
-    addr &= TARGET_PAGE_MASK;
-    p = page_find(addr >> TARGET_PAGE_BITS);
-    if (!p)
-        return;
-    tb = p->first_tb;
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (tb && pc != 0) {
-        current_tb = tb_find_pc(pc);
-    }
-#endif
-    while (tb != NULL) {
-        n = (uintptr_t)tb & 3;
-        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
-#ifdef TARGET_HAS_PRECISE_SMC
-        if (current_tb == tb &&
-            (current_tb->cflags & CF_COUNT_MASK) != 1) {
-                /* If we are modifying the current TB, we must stop
-                   its execution. We could be more precise by checking
-                   that the modification is after the current PC, but it
-                   would require a specialized function to partially
-                   restore the CPU state */
-
-            current_tb_modified = 1;
-            cpu_restore_state(current_tb, env, pc);
-            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
-                                 &current_flags);
-        }
-#endif /* TARGET_HAS_PRECISE_SMC */
-        tb_phys_invalidate(tb, addr);
-        tb = tb->page_next[n];
-    }
-    p->first_tb = NULL;
-#ifdef TARGET_HAS_PRECISE_SMC
-    if (current_tb_modified) {
-        /* we generate a block containing just the instruction
-           modifying the memory. It will ensure that it cannot modify
-           itself */
-        env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
-        cpu_resume_from_signal(env, puc);
-    }
-#endif
-}
-#endif
-
-/* add the tb in the target page and protect it if necessary */
-static inline void tb_alloc_page(TranslationBlock *tb,
-                                 unsigned int n, tb_page_addr_t page_addr)
-{
-    PageDesc *p;
-#ifndef CONFIG_USER_ONLY
-    bool page_already_protected;
-#endif
-
-    tb->page_addr[n] = page_addr;
-    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
-    tb->page_next[n] = p->first_tb;
-#ifndef CONFIG_USER_ONLY
-    page_already_protected = p->first_tb != NULL;
-#endif
-    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
-    invalidate_page_bitmap(p);
-
-#if defined(TARGET_HAS_SMC) || 1
-
-#if defined(CONFIG_USER_ONLY)
-    if (p->flags & PAGE_WRITE) {
-        target_ulong addr;
-        PageDesc *p2;
-        int prot;
-
-        /* force the host page as non writable (writes will have a
-           page fault + mprotect overhead) */
-        page_addr &= qemu_host_page_mask;
-        prot = 0;
-        for(addr = page_addr; addr < page_addr + qemu_host_page_size;
-            addr += TARGET_PAGE_SIZE) {
-
-            p2 = page_find (addr >> TARGET_PAGE_BITS);
-            if (!p2)
-                continue;
-            prot |= p2->flags;
-            p2->flags &= ~PAGE_WRITE;
-          }
-        mprotect(g2h(page_addr), qemu_host_page_size,
-                 (prot & PAGE_BITS) & ~PAGE_WRITE);
-#ifdef DEBUG_TB_INVALIDATE
-        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
-               page_addr);
-#endif
-    }
-#else
-    /* if some code is already present, then the pages are already
-       protected. So we handle the case where only the first TB is
-       allocated in a physical page */
-    if (!page_already_protected) {
-        tlb_protect_code(page_addr);
-    }
-#endif
-
-#endif /* TARGET_HAS_SMC */
-}
-
-/* add a new TB and link it to the physical page tables. phys_page2 is
-   (-1) to indicate that only one page contains the TB. */
-static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
-                         tb_page_addr_t phys_page2)
-{
-    unsigned int h;
-    TranslationBlock **ptb;
-
-    /* Grab the mmap lock to stop another thread invalidating this TB
-       before we are done.  */
-    mmap_lock();
-    /* add in the physical hash table */
-    h = tb_phys_hash_func(phys_pc);
-    ptb = &tb_phys_hash[h];
-    tb->phys_hash_next = *ptb;
-    *ptb = tb;
-
-    /* add in the page list */
-    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
-    if (phys_page2 != -1)
-        tb_alloc_page(tb, 1, phys_page2);
-    else
-        tb->page_addr[1] = -1;
-
-    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
-    tb->jmp_next[0] = NULL;
-    tb->jmp_next[1] = NULL;
-
-    /* init original jump addresses */
-    if (tb->tb_next_offset[0] != 0xffff)
-        tb_reset_jump(tb, 0);
-    if (tb->tb_next_offset[1] != 0xffff)
-        tb_reset_jump(tb, 1);
-
-#ifdef DEBUG_TB_CHECK
-    tb_page_check();
-#endif
-    mmap_unlock();
-}
-
-#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
-/* check whether the given addr is in TCG generated code buffer or not */
-bool is_tcg_gen_code(uintptr_t tc_ptr)
-{
-    /* This can be called during code generation, code_gen_buffer_max_size
-       is used instead of code_gen_ptr for upper boundary checking */
-    return (tc_ptr >= (uintptr_t)code_gen_buffer &&
-            tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
-}
-#endif
-
-/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
-   tb[1].tc_ptr. Return NULL if not found */
-TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
-{
-    int m_min, m_max, m;
-    uintptr_t v;
-    TranslationBlock *tb;
-
-    if (nb_tbs <= 0)
-        return NULL;
-    if (tc_ptr < (uintptr_t)code_gen_buffer ||
-        tc_ptr >= (uintptr_t)code_gen_ptr) {
-        return NULL;
-    }
-    /* binary search (cf Knuth) */
-    m_min = 0;
-    m_max = nb_tbs - 1;
-    while (m_min <= m_max) {
-        m = (m_min + m_max) >> 1;
-        tb = &tbs[m];
-        v = (uintptr_t)tb->tc_ptr;
-        if (v == tc_ptr)
-            return tb;
-        else if (tc_ptr < v) {
-            m_max = m - 1;
-        } else {
-            m_min = m + 1;
-        }
-    }
-    return &tbs[m_max];
-}
-
-static void tb_reset_jump_recursive(TranslationBlock *tb);
-
-static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
-{
-    TranslationBlock *tb1, *tb_next, **ptb;
-    unsigned int n1;
-
-    tb1 = tb->jmp_next[n];
-    if (tb1 != NULL) {
-        /* find head of list */
-        for(;;) {
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == 2)
-                break;
-            tb1 = tb1->jmp_next[n1];
-        }
-        /* we are now sure now that tb jumps to tb1 */
-        tb_next = tb1;
-
-        /* remove tb from the jmp_first list */
-        ptb = &tb_next->jmp_first;
-        for(;;) {
-            tb1 = *ptb;
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == n && tb1 == tb)
-                break;
-            ptb = &tb1->jmp_next[n1];
-        }
-        *ptb = tb->jmp_next[n];
-        tb->jmp_next[n] = NULL;
-
-        /* suppress the jump to next tb in generated code */
-        tb_reset_jump(tb, n);
-
-        /* suppress jumps in the tb on which we could have jumped */
-        tb_reset_jump_recursive(tb_next);
-    }
-}
-
-static void tb_reset_jump_recursive(TranslationBlock *tb)
-{
-    tb_reset_jump_recursive2(tb, 0);
-    tb_reset_jump_recursive2(tb, 1);
-}
-
 #if defined(TARGET_HAS_ICE)
 #if defined(CONFIG_USER_ONLY)
 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
@@ -1494,21 +300,6 @@ static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
     tb_invalidate_phys_page_range(pc, pc + 1, 0);
 }
 #else
-void tb_invalidate_phys_addr(hwaddr addr)
-{
-    ram_addr_t ram_addr;
-    MemoryRegionSection *section;
-
-    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
-    if (!(memory_region_is_ram(section->mr)
-          || (section->mr->rom_device && section->mr->readable))) {
-        return;
-    }
-    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
-        + memory_region_section_addr(section, addr);
-    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
-}
-
 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
 {
     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
@@ -1690,67 +481,6 @@ void cpu_single_step(CPUArchState *env, int enabled)
 #endif
 }
 
-static void cpu_unlink_tb(CPUArchState *env)
-{
-    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
-       problem and hope the cpu will stop of its own accord.  For userspace
-       emulation this often isn't actually as bad as it sounds.  Often
-       signals are used primarily to interrupt blocking syscalls.  */
-    TranslationBlock *tb;
-    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
-
-    spin_lock(&interrupt_lock);
-    tb = env->current_tb;
-    /* if the cpu is currently executing code, we must unlink it and
-       all the potentially executing TB */
-    if (tb) {
-        env->current_tb = NULL;
-        tb_reset_jump_recursive(tb);
-    }
-    spin_unlock(&interrupt_lock);
-}
-
-#ifndef CONFIG_USER_ONLY
-/* mask must never be zero, except for A20 change call */
-static void tcg_handle_interrupt(CPUArchState *env, int mask)
-{
-    CPUState *cpu = ENV_GET_CPU(env);
-    int old_mask;
-
-    old_mask = env->interrupt_request;
-    env->interrupt_request |= mask;
-
-    /*
-     * If called from iothread context, wake the target cpu in
-     * case its halted.
-     */
-    if (!qemu_cpu_is_self(cpu)) {
-        qemu_cpu_kick(cpu);
-        return;
-    }
-
-    if (use_icount) {
-        env->icount_decr.u16.high = 0xffff;
-        if (!can_do_io(env)
-            && (mask & ~old_mask) != 0) {
-            cpu_abort(env, "Raised interrupt while not in I/O function");
-        }
-    } else {
-        cpu_unlink_tb(env);
-    }
-}
-
-CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
-
-#else /* CONFIG_USER_ONLY */
-
-void cpu_interrupt(CPUArchState *env, int mask)
-{
-    env->interrupt_request |= mask;
-    cpu_unlink_tb(env);
-}
-#endif /* CONFIG_USER_ONLY */
-
 void cpu_reset_interrupt(CPUArchState *env, int mask)
 {
     env->interrupt_request &= ~mask;
@@ -1829,21 +559,6 @@ CPUArchState *cpu_copy(CPUArchState *env)
 }
 
 #if !defined(CONFIG_USER_ONLY)
-void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
-{
-    unsigned int i;
-
-    /* Discard jump cache entries for any tb which might potentially
-       overlap the flushed page.  */
-    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
-    memset (&env->tb_jmp_cache[i], 0, 
-            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
-
-    i = tb_jmp_cache_hash_page(addr);
-    memset (&env->tb_jmp_cache[i], 0, 
-            TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
-}
-
 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
                                       uintptr_t length)
 {
@@ -1933,264 +648,6 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
 
     return iotlb;
 }
-
-#else
-/*
- * Walks guest process memory "regions" one by one
- * and calls callback function 'fn' for each region.
- */
-
-struct walk_memory_regions_data
-{
-    walk_memory_regions_fn fn;
-    void *priv;
-    uintptr_t start;
-    int prot;
-};
-
-static int walk_memory_regions_end(struct walk_memory_regions_data *data,
-                                   abi_ulong end, int new_prot)
-{
-    if (data->start != -1ul) {
-        int rc = data->fn(data->priv, data->start, end, data->prot);
-        if (rc != 0) {
-            return rc;
-        }
-    }
-
-    data->start = (new_prot ? end : -1ul);
-    data->prot = new_prot;
-
-    return 0;
-}
-
-static int walk_memory_regions_1(struct walk_memory_regions_data *data,
-                                 abi_ulong base, int level, void **lp)
-{
-    abi_ulong pa;
-    int i, rc;
-
-    if (*lp == NULL) {
-        return walk_memory_regions_end(data, base, 0);
-    }
-
-    if (level == 0) {
-        PageDesc *pd = *lp;
-        for (i = 0; i < L2_SIZE; ++i) {
-            int prot = pd[i].flags;
-
-            pa = base | (i << TARGET_PAGE_BITS);
-            if (prot != data->prot) {
-                rc = walk_memory_regions_end(data, pa, prot);
-                if (rc != 0) {
-                    return rc;
-                }
-            }
-        }
-    } else {
-        void **pp = *lp;
-        for (i = 0; i < L2_SIZE; ++i) {
-            pa = base | ((abi_ulong)i <<
-                (TARGET_PAGE_BITS + L2_BITS * level));
-            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
-            if (rc != 0) {
-                return rc;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
-{
-    struct walk_memory_regions_data data;
-    uintptr_t i;
-
-    data.fn = fn;
-    data.priv = priv;
-    data.start = -1ul;
-    data.prot = 0;
-
-    for (i = 0; i < V_L1_SIZE; i++) {
-        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
-                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
-        if (rc != 0) {
-            return rc;
-        }
-    }
-
-    return walk_memory_regions_end(&data, 0, 0);
-}
-
-static int dump_region(void *priv, abi_ulong start,
-    abi_ulong end, unsigned long prot)
-{
-    FILE *f = (FILE *)priv;
-
-    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
-        " "TARGET_ABI_FMT_lx" %c%c%c\n",
-        start, end, end - start,
-        ((prot & PAGE_READ) ? 'r' : '-'),
-        ((prot & PAGE_WRITE) ? 'w' : '-'),
-        ((prot & PAGE_EXEC) ? 'x' : '-'));
-
-    return (0);
-}
-
-/* dump memory mappings */
-void page_dump(FILE *f)
-{
-    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
-            "start", "end", "size", "prot");
-    walk_memory_regions(f, dump_region);
-}
-
-int page_get_flags(target_ulong address)
-{
-    PageDesc *p;
-
-    p = page_find(address >> TARGET_PAGE_BITS);
-    if (!p)
-        return 0;
-    return p->flags;
-}
-
-/* Modify the flags of a page and invalidate the code if necessary.
-   The flag PAGE_WRITE_ORG is positioned automatically depending
-   on PAGE_WRITE.  The mmap_lock should already be held.  */
-void page_set_flags(target_ulong start, target_ulong end, int flags)
-{
-    target_ulong addr, len;
-
-    /* This function should never be called with addresses outside the
-       guest address space.  If this assert fires, it probably indicates
-       a missing call to h2g_valid.  */
-#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
-    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
-#endif
-    assert(start < end);
-
-    start = start & TARGET_PAGE_MASK;
-    end = TARGET_PAGE_ALIGN(end);
-
-    if (flags & PAGE_WRITE) {
-        flags |= PAGE_WRITE_ORG;
-    }
-
-    for (addr = start, len = end - start;
-         len != 0;
-         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
-        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
-
-        /* If the write protection bit is set, then we invalidate
-           the code inside.  */
-        if (!(p->flags & PAGE_WRITE) &&
-            (flags & PAGE_WRITE) &&
-            p->first_tb) {
-            tb_invalidate_phys_page(addr, 0, NULL);
-        }
-        p->flags = flags;
-    }
-}
-
-int page_check_range(target_ulong start, target_ulong len, int flags)
-{
-    PageDesc *p;
-    target_ulong end;
-    target_ulong addr;
-
-    /* This function should never be called with addresses outside the
-       guest address space.  If this assert fires, it probably indicates
-       a missing call to h2g_valid.  */
-#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
-    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
-#endif
-
-    if (len == 0) {
-        return 0;
-    }
-    if (start + len - 1 < start) {
-        /* We've wrapped around.  */
-        return -1;
-    }
-
-    end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
-    start = start & TARGET_PAGE_MASK;
-
-    for (addr = start, len = end - start;
-         len != 0;
-         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
-        p = page_find(addr >> TARGET_PAGE_BITS);
-        if( !p )
-            return -1;
-        if( !(p->flags & PAGE_VALID) )
-            return -1;
-
-        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
-            return -1;
-        if (flags & PAGE_WRITE) {
-            if (!(p->flags & PAGE_WRITE_ORG))
-                return -1;
-            /* unprotect the page if it was put read-only because it
-               contains translated code */
-            if (!(p->flags & PAGE_WRITE)) {
-                if (!page_unprotect(addr, 0, NULL))
-                    return -1;
-            }
-            return 0;
-        }
-    }
-    return 0;
-}
-
-/* called from signal handler: invalidate the code and unprotect the
-   page. Return TRUE if the fault was successfully handled. */
-int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
-{
-    unsigned int prot;
-    PageDesc *p;
-    target_ulong host_start, host_end, addr;
-
-    /* Technically this isn't safe inside a signal handler.  However we
-       know this only ever happens in a synchronous SEGV handler, so in
-       practice it seems to be ok.  */
-    mmap_lock();
-
-    p = page_find(address >> TARGET_PAGE_BITS);
-    if (!p) {
-        mmap_unlock();
-        return 0;
-    }
-
-    /* if the page was really writable, then we change its
-       protection back to writable */
-    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
-        host_start = address & qemu_host_page_mask;
-        host_end = host_start + qemu_host_page_size;
-
-        prot = 0;
-        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
-            p = page_find(addr >> TARGET_PAGE_BITS);
-            p->flags |= PAGE_WRITE;
-            prot |= p->flags;
-
-            /* and since the content will be modified, we must invalidate
-               the corresponding translated code. */
-            tb_invalidate_phys_page(addr, pc, puc);
-#ifdef DEBUG_TB_CHECK
-            tb_invalidate_check(addr);
-#endif
-        }
-        mprotect((void *)g2h(host_start), qemu_host_page_size,
-                 prot & PAGE_BITS);
-
-        mmap_unlock();
-        return 1;
-    }
-    mmap_unlock();
-    return 0;
-}
 #endif /* defined(CONFIG_USER_ONLY) */
 
 #if !defined(CONFIG_USER_ONLY)
@@ -2954,7 +1411,6 @@ static void check_watchpoint(int offset, int len_mask, int flags)
 {
     CPUArchState *env = cpu_single_env;
     target_ulong pc, cs_base;
-    TranslationBlock *tb;
     target_ulong vaddr;
     CPUWatchpoint *wp;
     int cpu_flags;
@@ -2973,13 +1429,7 @@ static void check_watchpoint(int offset, int len_mask, int flags)
             wp->flags |= BP_WATCHPOINT_HIT;
             if (!env->watchpoint_hit) {
                 env->watchpoint_hit = wp;
-                tb = tb_find_pc(env->mem_io_pc);
-                if (!tb) {
-                    cpu_abort(env, "check_watchpoint: could not find TB for "
-                              "pc=%p", (void *)env->mem_io_pc);
-                }
-                cpu_restore_state(tb, env, env->mem_io_pc);
-                tb_phys_invalidate(tb, -1);
+                tb_check_watchpoint(env);
                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                     env->exception_index = EXCP_DEBUG;
                     cpu_loop_exit(env);
@@ -4090,119 +2540,8 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
 }
 #endif
 
-/* in deterministic execution mode, instructions doing device I/Os
-   must be at the end of the TB */
-void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
-{
-    TranslationBlock *tb;
-    uint32_t n, cflags;
-    target_ulong pc, cs_base;
-    uint64_t flags;
-
-    tb = tb_find_pc(retaddr);
-    if (!tb) {
-        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
-                  (void *)retaddr);
-    }
-    n = env->icount_decr.u16.low + tb->icount;
-    cpu_restore_state(tb, env, retaddr);
-    /* Calculate how many instructions had been executed before the fault
-       occurred.  */
-    n = n - env->icount_decr.u16.low;
-    /* Generate a new TB ending on the I/O insn.  */
-    n++;
-    /* On MIPS and SH, delay slot instructions can only be restarted if
-       they were already the first instruction in the TB.  If this is not
-       the first instruction in a TB then re-execute the preceding
-       branch.  */
-#if defined(TARGET_MIPS)
-    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
-        env->active_tc.PC -= 4;
-        env->icount_decr.u16.low++;
-        env->hflags &= ~MIPS_HFLAG_BMASK;
-    }
-#elif defined(TARGET_SH4)
-    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
-            && n > 1) {
-        env->pc -= 2;
-        env->icount_decr.u16.low++;
-        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
-    }
-#endif
-    /* This should never happen.  */
-    if (n > CF_COUNT_MASK)
-        cpu_abort(env, "TB too big during recompile");
-
-    cflags = n | CF_LAST_IO;
-    pc = tb->pc;
-    cs_base = tb->cs_base;
-    flags = tb->flags;
-    tb_phys_invalidate(tb, -1);
-    /* FIXME: In theory this could raise an exception.  In practice
-       we have already translated the block once so it's probably ok.  */
-    tb_gen_code(env, pc, cs_base, flags, cflags);
-    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
-       the first in the TB) then we end up generating a whole new TB and
-       repeating the fault, which is horribly inefficient.
-       Better would be to execute just this insn uncached, or generate a
-       second new TB.  */
-    cpu_resume_from_signal(env, NULL);
-}
-
 #if !defined(CONFIG_USER_ONLY)
 
-void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
-{
-    int i, target_code_size, max_target_code_size;
-    int direct_jmp_count, direct_jmp2_count, cross_page;
-    TranslationBlock *tb;
-
-    target_code_size = 0;
-    max_target_code_size = 0;
-    cross_page = 0;
-    direct_jmp_count = 0;
-    direct_jmp2_count = 0;
-    for(i = 0; i < nb_tbs; i++) {
-        tb = &tbs[i];
-        target_code_size += tb->size;
-        if (tb->size > max_target_code_size)
-            max_target_code_size = tb->size;
-        if (tb->page_addr[1] != -1)
-            cross_page++;
-        if (tb->tb_next_offset[0] != 0xffff) {
-            direct_jmp_count++;
-            if (tb->tb_next_offset[1] != 0xffff) {
-                direct_jmp2_count++;
-            }
-        }
-    }
-    /* XXX: avoid using doubles ? */
-    cpu_fprintf(f, "Translation buffer state:\n");
-    cpu_fprintf(f, "gen code size       %td/%zd\n",
-                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
-    cpu_fprintf(f, "TB count            %d/%d\n", 
-                nb_tbs, code_gen_max_blocks);
-    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
-                nb_tbs ? target_code_size / nb_tbs : 0,
-                max_target_code_size);
-    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
-                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
-                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
-    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
-            cross_page,
-            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
-    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
-                direct_jmp_count,
-                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
-                direct_jmp2_count,
-                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
-    cpu_fprintf(f, "\nStatistics:\n");
-    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
-    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
-    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
-    tcg_dump_info(f, cpu_fprintf);
-}
-
 /*
  * A helper function for the _utterly broken_ virtio device model to find out if
  * it's running on a big endian machine. Don't do this at home kids!
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 9d33b189e6..bcf278d4ec 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -37,6 +37,8 @@ common-obj-$(CONFIG_I8259) += i8259_common.o i8259.o
 common-obj-y += fifo.o
 common-obj-y += pam.o
 
+extra-obj-y += pci/
+
 # PPC devices
 common-obj-$(CONFIG_PREP_PCI) += prep_pci.o
 common-obj-$(CONFIG_I82378) += i82378.o
diff --git a/hw/arm_boot.c b/hw/arm_boot.c
index 92e2cab476..ec3b8d5d12 100644
--- a/hw/arm_boot.c
+++ b/hw/arm_boot.c
@@ -44,11 +44,17 @@ static uint32_t bootloader[] = {
  * for an interprocessor interrupt and polling a configurable
  * location for the kernel secondary CPU entry point.
  */
+#define DSB_INSN 0xf57ff04f
+#define CP15_DSB_INSN 0xee070f9a /* mcr cp15, 0, r0, c7, c10, 4 */
+
 static uint32_t smpboot[] = {
-  0xe59f201c, /* ldr r2, gic_cpu_if */
-  0xe59f001c, /* ldr r0, startaddr */
+  0xe59f2028, /* ldr r2, gic_cpu_if */
+  0xe59f0028, /* ldr r0, startaddr */
   0xe3a01001, /* mov r1, #1 */
-  0xe5821000, /* str r1, [r2] */
+  0xe5821000, /* str r1, [r2] - set GICC_CTLR.Enable */
+  0xe3a010ff, /* mov r1, #0xff */
+  0xe5821004, /* str r1, [r2, 4] - set GIC_PMR.Priority to 0xff */
+  DSB_INSN,   /* dsb */
   0xe320f003, /* wfi */
   0xe5901000, /* ldr     r1, [r0] */
   0xe1110001, /* tst     r1, r1 */
@@ -65,6 +71,11 @@ static void default_write_secondary(ARMCPU *cpu,
     smpboot[ARRAY_SIZE(smpboot) - 1] = info->smp_bootreg_addr;
     smpboot[ARRAY_SIZE(smpboot) - 2] = info->gic_cpu_if_addr;
     for (n = 0; n < ARRAY_SIZE(smpboot); n++) {
+        /* Replace DSB with the pre-v7 DSB if necessary. */
+        if (!arm_feature(&cpu->env, ARM_FEATURE_V7) &&
+            smpboot[n] == DSB_INSN) {
+            smpboot[n] = CP15_DSB_INSN;
+        }
         smpboot[n] = tswap32(smpboot[n]);
     }
     rom_add_blob_fixed("smpboot", smpboot, sizeof(smpboot),
diff --git a/hw/arm_gic.c b/hw/arm_gic.c
index f9e423f152..8d769de4f5 100644
--- a/hw/arm_gic.c
+++ b/hw/arm_gic.c
@@ -73,7 +73,7 @@ void gic_update(GICState *s)
             }
         }
         level = 0;
-        if (best_prio <= s->priority_mask[cpu]) {
+        if (best_prio < s->priority_mask[cpu]) {
             s->current_pending[cpu] = best_irq;
             if (best_prio < s->running_priority[cpu]) {
                 DPRINTF("Raised pending IRQ %d\n", best_irq);
@@ -374,7 +374,8 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
           value = 0xff;
         for (i = 0; i < 8; i++) {
             if (value & (1 << i)) {
-                int mask = (irq < GIC_INTERNAL) ? (1 << cpu) : GIC_TARGET(irq);
+                int mask =
+                    (irq < GIC_INTERNAL) ? (1 << cpu) : GIC_TARGET(irq + i);
                 int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
 
                 if (!GIC_TEST_ENABLED(irq + i, cm)) {
@@ -417,7 +418,7 @@ static void gic_dist_writeb(void *opaque, hwaddr offset,
 
         for (i = 0; i < 8; i++) {
             if (value & (1 << i)) {
-                GIC_SET_PENDING(irq + i, GIC_TARGET(irq));
+                GIC_SET_PENDING(irq + i, GIC_TARGET(irq + i));
             }
         }
     } else if (offset < 0x300) {
diff --git a/hw/arm_gic_common.c b/hw/arm_gic_common.c
index 8369309d21..73ae331807 100644
--- a/hw/arm_gic_common.c
+++ b/hw/arm_gic_common.c
@@ -127,7 +127,11 @@ static void arm_gic_common_reset(DeviceState *dev)
     int i;
     memset(s->irq_state, 0, GIC_MAXIRQ * sizeof(gic_irq_state));
     for (i = 0 ; i < s->num_cpu; i++) {
-        s->priority_mask[i] = 0xf0;
+        if (s->revision == REV_11MPCORE) {
+            s->priority_mask[i] = 0xf0;
+        } else {
+            s->priority_mask[i] = 0;
+        }
         s->current_pending[i] = 1023;
         s->running_irq[i] = 1023;
         s->running_priority[i] = 0x100;
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index f0a2e7b5d2..4963678bf1 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -455,9 +455,11 @@ static void armv7m_nvic_reset(DeviceState *dev)
     nc->parent_reset(dev);
     /* Common GIC reset resets to disabled; the NVIC doesn't have
      * per-CPU interfaces so mark our non-existent CPU interface
-     * as enabled by default.
+     * as enabled by default, and with a priority mask which allows
+     * all interrupts through.
      */
     s->gic.cpu_enabled[0] = 1;
+    s->gic.priority_mask[0] = 0x100;
     /* The NVIC as a whole is always enabled. */
     s->gic.enabled = 1;
     systick_reset(s);
diff --git a/hw/boards.h b/hw/boards.h
index 813d0e5109..c66fa16a9d 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -3,6 +3,7 @@
 #ifndef HW_BOARDS_H
 #define HW_BOARDS_H
 
+#include "blockdev.h"
 #include "qdev.h"
 
 typedef struct QEMUMachineInitArgs {
@@ -24,7 +25,7 @@ typedef struct QEMUMachine {
     const char *desc;
     QEMUMachineInitFunc *init;
     QEMUMachineResetFunc *reset;
-    int use_scsi;
+    BlockInterfaceType block_default_type;
     int max_cpus;
     unsigned int no_serial:1,
         no_parallel:1,
diff --git a/hw/device-hotplug.c b/hw/device-hotplug.c
index 6d9c080381..839b9ea1d4 100644
--- a/hw/device-hotplug.c
+++ b/hw/device-hotplug.c
@@ -39,7 +39,7 @@ DriveInfo *add_init_drive(const char *optstr)
     if (!opts)
         return NULL;
 
-    dinfo = drive_init(opts, current_machine->use_scsi);
+    dinfo = drive_init(opts, current_machine->block_default_type);
     if (!dinfo) {
         qemu_opts_del(opts);
         return NULL;
diff --git a/hw/ds1338.c b/hw/ds1338.c
index b576d56438..1aefa3ba04 100644
--- a/hw/ds1338.c
+++ b/hw/ds1338.c
@@ -17,9 +17,16 @@
  */
 #define NVRAM_SIZE 64
 
+/* Flags definitions */
+#define SECONDS_CH 0x80
+#define HOURS_12   0x40
+#define HOURS_PM   0x20
+#define CTRL_OSF   0x20
+
 typedef struct {
     I2CSlave i2c;
     int64_t offset;
+    uint8_t wday_offset;
     uint8_t nvram[NVRAM_SIZE];
     int32_t ptr;
     bool addr_byte;
@@ -27,12 +34,13 @@ typedef struct {
 
 static const VMStateDescription vmstate_ds1338 = {
     .name = "ds1338",
-    .version_id = 1,
+    .version_id = 2,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
     .fields = (VMStateField[]) {
         VMSTATE_I2C_SLAVE(i2c, DS1338State),
         VMSTATE_INT64(offset, DS1338State),
+        VMSTATE_UINT8_V(wday_offset, DS1338State, 2),
         VMSTATE_UINT8_ARRAY(nvram, DS1338State, NVRAM_SIZE),
         VMSTATE_INT32(ptr, DS1338State),
         VMSTATE_BOOL(addr_byte, DS1338State),
@@ -49,17 +57,22 @@ static void capture_current_time(DS1338State *s)
     qemu_get_timedate(&now, s->offset);
     s->nvram[0] = to_bcd(now.tm_sec);
     s->nvram[1] = to_bcd(now.tm_min);
-    if (s->nvram[2] & 0x40) {
-        s->nvram[2] = (to_bcd((now.tm_hour % 12)) + 1) | 0x40;
-        if (now.tm_hour >= 12) {
-            s->nvram[2] |= 0x20;
+    if (s->nvram[2] & HOURS_12) {
+        int tmp = now.tm_hour;
+        if (tmp == 0) {
+            tmp = 24;
+        }
+        if (tmp <= 12) {
+            s->nvram[2] = HOURS_12 | to_bcd(tmp);
+        } else {
+            s->nvram[2] = HOURS_12 | HOURS_PM | to_bcd(tmp - 12);
         }
     } else {
         s->nvram[2] = to_bcd(now.tm_hour);
     }
-    s->nvram[3] = to_bcd(now.tm_wday) + 1;
+    s->nvram[3] = (now.tm_wday + s->wday_offset) % 7 + 1;
     s->nvram[4] = to_bcd(now.tm_mday);
-    s->nvram[5] = to_bcd(now.tm_mon) + 1;
+    s->nvram[5] = to_bcd(now.tm_mon + 1);
     s->nvram[6] = to_bcd(now.tm_year - 100);
 }
 
@@ -114,7 +127,8 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
         s->addr_byte = false;
         return 0;
     }
-    if (s->ptr < 8) {
+    if (s->ptr < 7) {
+        /* Time register. */
         struct tm now;
         qemu_get_timedate(&now, s->offset);
         switch(s->ptr) {
@@ -126,19 +140,27 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
             now.tm_min = from_bcd(data & 0x7f);
             break;
         case 2:
-            if (data & 0x40) {
-                if (data & 0x20) {
-                    data = from_bcd(data & 0x4f) + 11;
-                } else {
-                    data = from_bcd(data & 0x1f) - 1;
+            if (data & HOURS_12) {
+                int tmp = from_bcd(data & (HOURS_PM - 1));
+                if (data & HOURS_PM) {
+                    tmp += 12;
+                }
+                if (tmp == 24) {
+                    tmp = 0;
                 }
+                now.tm_hour = tmp;
             } else {
-                data = from_bcd(data);
+                now.tm_hour = from_bcd(data & (HOURS_12 - 1));
             }
-            now.tm_hour = data;
             break;
         case 3:
-            now.tm_wday = from_bcd(data & 7) - 1;
+            {
+                /* The day field is supposed to contain a value in
+                   the range 1-7. Otherwise behavior is undefined.
+                 */
+                int user_wday = (data & 7) - 1;
+                s->wday_offset = (user_wday - now.tm_wday + 7) % 7;
+            }
             break;
         case 4:
             now.tm_mday = from_bcd(data & 0x3f);
@@ -149,11 +171,19 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data)
         case 6:
             now.tm_year = from_bcd(data) + 100;
             break;
-        case 7:
-            /* Control register. Currently ignored.  */
-            break;
         }
         s->offset = qemu_timedate_diff(&now);
+    } else if (s->ptr == 7) {
+        /* Control register. */
+
+        /* Ensure bits 2, 3 and 6 will read back as zero. */
+        data &= 0xB3;
+
+        /* Attempting to write the OSF flag to logic 1 leaves the
+           value unchanged. */
+        data = (data & ~CTRL_OSF) | (data & s->nvram[s->ptr] & CTRL_OSF);
+
+        s->nvram[s->ptr] = data;
     } else {
         s->nvram[s->ptr] = data;
     }
@@ -166,6 +196,18 @@ static int ds1338_init(I2CSlave *i2c)
     return 0;
 }
 
+static void ds1338_reset(DeviceState *dev)
+{
+    DS1338State *s = FROM_I2C_SLAVE(DS1338State, I2C_SLAVE_FROM_QDEV(dev));
+
+    /* The clock is running and synchronized with the host */
+    s->offset = 0;
+    s->wday_offset = 0;
+    memset(s->nvram, 0, NVRAM_SIZE);
+    s->ptr = 0;
+    s->addr_byte = false;
+}
+
 static void ds1338_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
@@ -175,6 +217,7 @@ static void ds1338_class_init(ObjectClass *klass, void *data)
     k->event = ds1338_event;
     k->recv = ds1338_recv;
     k->send = ds1338_send;
+    dc->reset = ds1338_reset;
     dc->vmsd = &vmstate_ds1338;
 }
 
diff --git a/hw/exynos4210.c b/hw/exynos4210.c
index 00d4db8871..22148cd946 100644
--- a/hw/exynos4210.c
+++ b/hw/exynos4210.c
@@ -80,12 +80,16 @@ void exynos4210_write_secondary(ARMCPU *cpu,
 {
     int n;
     uint32_t smpboot[] = {
-        0xe59f3024, /* ldr r3, External gic_cpu_if */
-        0xe59f2024, /* ldr r2, Internal gic_cpu_if */
-        0xe59f0024, /* ldr r0, startaddr */
+        0xe59f3034, /* ldr r3, External gic_cpu_if */
+        0xe59f2034, /* ldr r2, Internal gic_cpu_if */
+        0xe59f0034, /* ldr r0, startaddr */
         0xe3a01001, /* mov r1, #1 */
         0xe5821000, /* str r1, [r2] */
         0xe5831000, /* str r1, [r3] */
+        0xe3a010ff, /* mov r1, #0xff */
+        0xe5821004, /* str r1, [r2, #4] */
+        0xe5831004, /* str r1, [r3, #4] */
+        0xf57ff04f, /* dsb */
         0xe320f003, /* wfi */
         0xe5901000, /* ldr     r1, [r0] */
         0xe1110001, /* tst     r1, r1 */
diff --git a/hw/exynos4210_mct.c b/hw/exynos4210_mct.c
index e79cd6ac01..37dbda92df 100644
--- a/hw/exynos4210_mct.c
+++ b/hw/exynos4210_mct.c
@@ -568,7 +568,7 @@ static void exynos4210_gfrc_event(void *opaque)
     /* Reload FRC to reach nearest comparator */
     s->g_timer.curr_comp = exynos4210_gcomp_find(s);
     distance = exynos4210_gcomp_get_distance(s, s->g_timer.curr_comp);
-    if (distance > MCT_GT_COUNTER_STEP) {
+    if (distance > MCT_GT_COUNTER_STEP || !distance) {
         distance = MCT_GT_COUNTER_STEP;
     }
     exynos4210_gfrc_set_count(&s->g_timer, distance);
diff --git a/hw/highbank.c b/hw/highbank.c
index afbb005422..8e35127c8a 100644
--- a/hw/highbank.c
+++ b/hw/highbank.c
@@ -44,9 +44,12 @@ static void hb_write_secondary(ARMCPU *cpu, const struct arm_boot_info *info)
         0xe210000f, /* ands r0, r0, #0x0f */
         0xe3a03040, /* mov r3, #0x40 - jump address is 0x40 + 0x10 * core id */
         0xe0830200, /* add r0, r3, r0, lsl #4 */
-        0xe59f2018, /* ldr r2, privbase */
+        0xe59f2024, /* ldr r2, privbase */
         0xe3a01001, /* mov r1, #1 */
-        0xe5821100, /* str r1, [r2, #256] */
+        0xe5821100, /* str r1, [r2, #256] - set GICC_CTLR.Enable */
+        0xe3a010ff, /* mov r1, #0xff */
+        0xe5821104, /* str r1, [r2, #260] - set GICC_PMR.Priority to 0xff */
+        0xf57ff04f, /* dsb */
         0xe320f003, /* wfi */
         0xe5901000, /* ldr     r1, [r0] */
         0xe1110001, /* tst     r1, r1 */
@@ -326,7 +329,7 @@ static QEMUMachine highbank_machine = {
     .name = "highbank",
     .desc = "Calxeda Highbank (ECX-1000)",
     .init = highbank_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
diff --git a/hw/ide/core.c b/hw/ide/core.c
index adc4aa41b9..0e5bc7fe3b 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1869,6 +1869,8 @@ static void ide_reset(IDEState *s)
     s->io_buffer_index = 0;
     s->cd_sector_size = 0;
     s->atapi_dma = 0;
+    s->tray_locked = 0;
+    s->tray_open = 0;
     /* ATA DMA state */
     s->io_buffer_size = 0;
     s->req_nb_sectors = 0;
diff --git a/hw/kvmvapic.c b/hw/kvmvapic.c
index e04c4011d7..60c8fc46aa 100644
--- a/hw/kvmvapic.c
+++ b/hw/kvmvapic.c
@@ -387,7 +387,6 @@ static void patch_instruction(VAPICROMState *s, CPUX86State *env, target_ulong i
     VAPICHandlers *handlers;
     uint8_t opcode[2];
     uint32_t imm32;
-    TranslationBlock *current_tb;
     target_ulong current_pc = 0;
     target_ulong current_cs_base = 0;
     int current_flags = 0;
@@ -399,8 +398,7 @@ static void patch_instruction(VAPICROMState *s, CPUX86State *env, target_ulong i
     }
 
     if (!kvm_enabled()) {
-        current_tb = tb_find_pc(env->mem_io_pc);
-        cpu_restore_state(current_tb, env, env->mem_io_pc);
+        cpu_restore_state(env, env->mem_io_pc);
         cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                              &current_flags);
     }
diff --git a/hw/leon3.c b/hw/leon3.c
index 774273828f..ef83dffd85 100644
--- a/hw/leon3.c
+++ b/hw/leon3.c
@@ -212,7 +212,6 @@ static QEMUMachine leon3_generic_machine = {
     .name     = "leon3_generic",
     .desc     = "Leon-3 generic",
     .init     = leon3_generic_hw_init,
-    .use_scsi = 0,
 };
 
 static void leon3_machine_init(void)
diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c
index 0847427241..ea1416ae2f 100644
--- a/hw/mips_jazz.c
+++ b/hw/mips_jazz.c
@@ -324,14 +324,14 @@ static QEMUMachine mips_magnum_machine = {
     .name = "magnum",
     .desc = "MIPS Magnum",
     .init = mips_magnum_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine mips_pica61_machine = {
     .name = "pica61",
     .desc = "Acer Pica 61",
     .init = mips_pica61_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static void mips_jazz_machine_init(void)
diff --git a/hw/openpic.c b/hw/openpic.c
index 4791dc6eaf..9c956b9dcc 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -37,6 +37,8 @@
 #include "ppc_mac.h"
 #include "pci/pci.h"
 #include "openpic.h"
+#include "sysbus.h"
+#include "pci/msi.h"
 
 //#define DEBUG_OPENPIC
 
@@ -46,89 +48,61 @@
 #define DPRINTF(fmt, ...) do { } while (0)
 #endif
 
-#define USE_MPCxxx /* Intel model is broken, for now */
-
-#if defined (USE_INTEL_GW80314)
-/* Intel GW80314 I/O Companion chip */
-
-#define MAX_CPU     4
-#define MAX_IRQ    32
-#define MAX_DBL     4
-#define MAX_MBX     4
-#define MAX_TMR     4
-#define VECTOR_BITS 8
-#define MAX_IPI     4
-
-#define VID (0x00000000)
-
-#elif defined(USE_MPCxxx)
-
-#define MAX_CPU    15
-#define MAX_IRQ   128
-#define MAX_DBL     0
-#define MAX_MBX     0
+#define MAX_CPU     15
+#define MAX_SRC     256
 #define MAX_TMR     4
 #define VECTOR_BITS 8
 #define MAX_IPI     4
+#define MAX_MSI     8
+#define MAX_IRQ     (MAX_SRC + MAX_IPI + MAX_TMR)
 #define VID         0x03 /* MPIC version ID */
-#define VENI        0x00000000 /* Vendor ID */
-
-enum {
-    IRQ_IPVP = 0,
-    IRQ_IDE,
-};
 
-/* OpenPIC */
-#define OPENPIC_MAX_CPU      2
-#define OPENPIC_MAX_IRQ     64
-#define OPENPIC_EXT_IRQ     48
-#define OPENPIC_MAX_TMR      MAX_TMR
-#define OPENPIC_MAX_IPI      MAX_IPI
+/* OpenPIC capability flags */
+#define OPENPIC_FLAG_IDE_CRIT     (1 << 0)
+
+/* OpenPIC address map */
+#define OPENPIC_GLB_REG_START        0x0
+#define OPENPIC_GLB_REG_SIZE         0x10F0
+#define OPENPIC_TMR_REG_START        0x10F0
+#define OPENPIC_TMR_REG_SIZE         0x220
+#define OPENPIC_MSI_REG_START        0x1600
+#define OPENPIC_MSI_REG_SIZE         0x200
+#define OPENPIC_SRC_REG_START        0x10000
+#define OPENPIC_SRC_REG_SIZE         (MAX_SRC * 0x20)
+#define OPENPIC_CPU_REG_START        0x20000
+#define OPENPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
+
+/* Raven */
+#define RAVEN_MAX_CPU      2
+#define RAVEN_MAX_EXT     48
+#define RAVEN_MAX_IRQ     64
+#define RAVEN_MAX_TMR      MAX_TMR
+#define RAVEN_MAX_IPI      MAX_IPI
 
 /* Interrupt definitions */
-#define OPENPIC_IRQ_FE     (OPENPIC_EXT_IRQ)     /* Internal functional IRQ */
-#define OPENPIC_IRQ_ERR    (OPENPIC_EXT_IRQ + 1) /* Error IRQ */
-#define OPENPIC_IRQ_TIM0   (OPENPIC_EXT_IRQ + 2) /* First timer IRQ */
-#if OPENPIC_MAX_IPI > 0
-#define OPENPIC_IRQ_IPI0   (OPENPIC_IRQ_TIM0 + OPENPIC_MAX_TMR) /* First IPI IRQ */
-#define OPENPIC_IRQ_DBL0   (OPENPIC_IRQ_IPI0 + (OPENPIC_MAX_CPU * OPENPIC_MAX_IPI)) /* First doorbell IRQ */
-#else
-#define OPENPIC_IRQ_DBL0   (OPENPIC_IRQ_TIM0 + OPENPIC_MAX_TMR) /* First doorbell IRQ */
-#define OPENPIC_IRQ_MBX0   (OPENPIC_IRQ_DBL0 + OPENPIC_MAX_DBL) /* First mailbox IRQ */
-#endif
-
-/* MPIC */
-#define MPIC_MAX_CPU      1
-#define MPIC_MAX_EXT     12
-#define MPIC_MAX_INT     64
-#define MPIC_MAX_MSG      4
-#define MPIC_MAX_MSI      8
-#define MPIC_MAX_TMR      MAX_TMR
-#define MPIC_MAX_IPI      MAX_IPI
-#define MPIC_MAX_IRQ     (MPIC_MAX_EXT + MPIC_MAX_INT + MPIC_MAX_TMR + MPIC_MAX_MSG + MPIC_MAX_MSI + (MPIC_MAX_IPI * MPIC_MAX_CPU))
+#define RAVEN_FE_IRQ     (RAVEN_MAX_EXT)     /* Internal functional IRQ */
+#define RAVEN_ERR_IRQ    (RAVEN_MAX_EXT + 1) /* Error IRQ */
+#define RAVEN_TMR_IRQ    (RAVEN_MAX_EXT + 2) /* First timer IRQ */
+#define RAVEN_IPI_IRQ    (RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */
+/* First doorbell IRQ */
+#define RAVEN_DBL_IRQ    (RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI))
+
+/* FSL_MPIC_20 */
+#define FSL_MPIC_20_MAX_CPU      1
+#define FSL_MPIC_20_MAX_EXT     12
+#define FSL_MPIC_20_MAX_INT     64
+#define FSL_MPIC_20_MAX_IRQ     MAX_IRQ
 
 /* Interrupt definitions */
-#define MPIC_EXT_IRQ      0
-#define MPIC_INT_IRQ      (MPIC_EXT_IRQ + MPIC_MAX_EXT)
-#define MPIC_TMR_IRQ      (MPIC_INT_IRQ + MPIC_MAX_INT)
-#define MPIC_MSG_IRQ      (MPIC_TMR_IRQ + MPIC_MAX_TMR)
-#define MPIC_MSI_IRQ      (MPIC_MSG_IRQ + MPIC_MAX_MSG)
-#define MPIC_IPI_IRQ      (MPIC_MSI_IRQ + MPIC_MAX_MSI)
-
-#define MPIC_GLB_REG_START        0x0
-#define MPIC_GLB_REG_SIZE         0x10F0
-#define MPIC_TMR_REG_START        0x10F0
-#define MPIC_TMR_REG_SIZE         0x220
-#define MPIC_EXT_REG_START        0x10000
-#define MPIC_EXT_REG_SIZE         0x180
-#define MPIC_INT_REG_START        0x10200
-#define MPIC_INT_REG_SIZE         0x800
-#define MPIC_MSG_REG_START        0x11600
-#define MPIC_MSG_REG_SIZE         0x100
-#define MPIC_MSI_REG_START        0x11C00
-#define MPIC_MSI_REG_SIZE         0x100
-#define MPIC_CPU_REG_START        0x20000
-#define MPIC_CPU_REG_SIZE         0x100 + ((MAX_CPU - 1) * 0x1000)
+/* IRQs, accessible through the IRQ region */
+#define FSL_MPIC_20_EXT_IRQ      0x00
+#define FSL_MPIC_20_INT_IRQ      0x10
+#define FSL_MPIC_20_MSG_IRQ      0xb0
+#define FSL_MPIC_20_MSI_IRQ      0xe0
+/* These are available through separate regions, but
+   for simplicity's sake mapped into the same number space */
+#define FSL_MPIC_20_TMR_IRQ      0x100
+#define FSL_MPIC_20_IPI_IRQ      0x104
 
 /*
  * Block Revision Register1 (BRR1): QEMU does not fully emulate
@@ -141,34 +115,42 @@ enum {
 #define FSL_BRR1_IPMJ (0x00 << 8) /* 8 bit IP major number */
 #define FSL_BRR1_IPMN 0x00 /* 8 bit IP minor number */
 
-enum mpic_ide_bits {
-    IDR_EP     = 31,
-    IDR_CI0     = 30,
-    IDR_CI1     = 29,
-    IDR_P1     = 1,
-    IDR_P0     = 0,
-};
+#define FREP_NIRQ_SHIFT   16
+#define FREP_NCPU_SHIFT    8
+#define FREP_VID_SHIFT     0
 
-#else
-#error "Please select which OpenPic implementation is to be emulated"
-#endif
+#define VID_REVISION_1_2   2
+#define VID_REVISION_1_3   3
 
-#define OPENPIC_PAGE_SIZE 4096
+#define VENI_GENERIC      0x00000000 /* Generic Vendor ID */
+
+#define IDR_EP_SHIFT      31
+#define IDR_EP_MASK       (1 << IDR_EP_SHIFT)
+#define IDR_CI0_SHIFT     30
+#define IDR_CI1_SHIFT     29
+#define IDR_P1_SHIFT      1
+#define IDR_P0_SHIFT      0
+
+#define MSIIR_OFFSET       0x140
+#define MSIIR_SRS_SHIFT    29
+#define MSIIR_SRS_MASK     (0x7 << MSIIR_SRS_SHIFT)
+#define MSIIR_IBS_SHIFT    24
+#define MSIIR_IBS_MASK     (0x1f << MSIIR_IBS_SHIFT)
 
 #define BF_WIDTH(_bits_) \
 (((_bits_) + (sizeof(uint32_t) * 8) - 1) / (sizeof(uint32_t) * 8))
 
-static inline void set_bit (uint32_t *field, int bit)
+static inline void set_bit(uint32_t *field, int bit)
 {
     field[bit >> 5] |= 1 << (bit & 0x1F);
 }
 
-static inline void reset_bit (uint32_t *field, int bit)
+static inline void reset_bit(uint32_t *field, int bit)
 {
     field[bit >> 5] &= ~(1 << (bit & 0x1F));
 }
 
-static inline int test_bit (uint32_t *field, int bit)
+static inline int test_bit(uint32_t *field, int bit)
 {
     return (field[bit >> 5] & 1 << (bit & 0x1F)) != 0;
 }
@@ -183,41 +165,37 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
 static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
                                        uint32_t val, int idx);
 
-enum {
-    IRQ_EXTERNAL = 0x01,
-    IRQ_INTERNAL = 0x02,
-    IRQ_TIMER    = 0x04,
-    IRQ_SPECIAL  = 0x08,
-};
-
 typedef struct IRQ_queue_t {
     uint32_t queue[BF_WIDTH(MAX_IRQ)];
     int next;
     int priority;
+    int pending;    /* nr of pending bits in queue */
 } IRQ_queue_t;
 
 typedef struct IRQ_src_t {
     uint32_t ipvp;  /* IRQ vector/priority register */
     uint32_t ide;   /* IRQ destination register */
-    int type;
     int last_cpu;
     int pending;    /* TRUE if IRQ is pending */
 } IRQ_src_t;
 
-enum IPVP_bits {
-    IPVP_MASK     = 31,
-    IPVP_ACTIVITY = 30,
-    IPVP_MODE     = 29,
-    IPVP_POLARITY = 23,
-    IPVP_SENSE    = 22,
-};
+#define IPVP_MASK_SHIFT       31
+#define IPVP_MASK_MASK        (1 << IPVP_MASK_SHIFT)
+#define IPVP_ACTIVITY_SHIFT   30
+#define IPVP_ACTIVITY_MASK    (1 << IPVP_ACTIVITY_SHIFT)
+#define IPVP_MODE_SHIFT       29
+#define IPVP_MODE_MASK        (1 << IPVP_MODE_SHIFT)
+#define IPVP_POLARITY_SHIFT   23
+#define IPVP_POLARITY_MASK    (1 << IPVP_POLARITY_SHIFT)
+#define IPVP_SENSE_SHIFT      22
+#define IPVP_SENSE_MASK       (1 << IPVP_SENSE_SHIFT)
+
 #define IPVP_PRIORITY_MASK     (0x1F << 16)
 #define IPVP_PRIORITY(_ipvpr_) ((int)(((_ipvpr_) & IPVP_PRIORITY_MASK) >> 16))
 #define IPVP_VECTOR_MASK       ((1 << VECTOR_BITS) - 1)
 #define IPVP_VECTOR(_ipvpr_)   ((_ipvpr_) & IPVP_VECTOR_MASK)
 
 typedef struct IRQ_dst_t {
-    uint32_t tfrr;
     uint32_t pctp; /* CPU current task priority */
     uint32_t pcsr; /* CPU sensitivity register */
     IRQ_queue_t raised;
@@ -225,18 +203,28 @@ typedef struct IRQ_dst_t {
     qemu_irq *irqs;
 } IRQ_dst_t;
 
-typedef struct openpic_t {
-    PCIDevice pci_dev;
+typedef struct OpenPICState {
+    SysBusDevice busdev;
     MemoryRegion mem;
 
+    /* Behavior control */
+    uint32_t model;
+    uint32_t flags;
+    uint32_t nb_irqs;
+    uint32_t vid;
+    uint32_t veni; /* Vendor identification register */
+    uint32_t spve_mask;
+    uint32_t tifr_reset;
+    uint32_t ipvp_reset;
+    uint32_t ide_reset;
+    uint32_t brr1;
+
     /* Sub-regions */
-    MemoryRegion sub_io_mem[7];
+    MemoryRegion sub_io_mem[5];
 
     /* Global registers */
     uint32_t frep; /* Feature reporting register */
     uint32_t glbc; /* Global configuration register  */
-    uint32_t micr; /* MPIC interrupt configuration register */
-    uint32_t veni; /* Vendor identification register */
     uint32_t pint; /* Processor initialization register */
     uint32_t spve; /* Spurious vector register */
     uint32_t tifr; /* Timer frequency reporting register */
@@ -244,56 +232,54 @@ typedef struct openpic_t {
     IRQ_src_t src[MAX_IRQ];
     /* Local registers per output pin */
     IRQ_dst_t dst[MAX_CPU];
-    int nb_cpus;
+    uint32_t nb_cpus;
     /* Timer registers */
     struct {
         uint32_t ticc;  /* Global timer current count register */
         uint32_t tibc;  /* Global timer base count register */
     } timers[MAX_TMR];
-#if MAX_DBL > 0
-    /* Doorbell registers */
-    uint32_t dar;        /* Doorbell activate register */
+    /* Shared MSI registers */
     struct {
-        uint32_t dmr;    /* Doorbell messaging register */
-    } doorbells[MAX_DBL];
-#endif
-#if MAX_MBX > 0
-    /* Mailbox registers */
-    struct {
-        uint32_t mbr;    /* Mailbox register */
-    } mailboxes[MAX_MAILBOXES];
-#endif
-    /* IRQ out is used when in bypass mode (not implemented) */
-    qemu_irq irq_out;
-    int max_irq;
-    int irq_ipi0;
-    int irq_tim0;
-    void (*reset) (void *);
-    void (*irq_raise) (struct openpic_t *, int, IRQ_src_t *);
-} openpic_t;
-
-static inline void IRQ_setbit (IRQ_queue_t *q, int n_IRQ)
+        uint32_t msir;   /* Shared Message Signaled Interrupt Register */
+    } msi[MAX_MSI];
+    uint32_t max_irq;
+    uint32_t irq_ipi0;
+    uint32_t irq_tim0;
+    uint32_t irq_msi;
+} OpenPICState;
+
+static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src);
+
+static inline void IRQ_setbit(IRQ_queue_t *q, int n_IRQ)
 {
+    q->pending++;
     set_bit(q->queue, n_IRQ);
 }
 
-static inline void IRQ_resetbit (IRQ_queue_t *q, int n_IRQ)
+static inline void IRQ_resetbit(IRQ_queue_t *q, int n_IRQ)
 {
+    q->pending--;
     reset_bit(q->queue, n_IRQ);
 }
 
-static inline int IRQ_testbit (IRQ_queue_t *q, int n_IRQ)
+static inline int IRQ_testbit(IRQ_queue_t *q, int n_IRQ)
 {
     return test_bit(q->queue, n_IRQ);
 }
 
-static void IRQ_check (openpic_t *opp, IRQ_queue_t *q)
+static void IRQ_check(OpenPICState *opp, IRQ_queue_t *q)
 {
     int next, i;
     int priority;
 
     next = -1;
     priority = -1;
+
+    if (!q->pending) {
+        /* IRQ bitmap is empty */
+        goto out;
+    }
+
     for (i = 0; i < opp->max_irq; i++) {
         if (IRQ_testbit(q, i)) {
             DPRINTF("IRQ_check: irq %d set ipvp_pr=%d pr=%d\n",
@@ -304,11 +290,13 @@ static void IRQ_check (openpic_t *opp, IRQ_queue_t *q)
             }
         }
     }
+
+out:
     q->next = next;
     q->priority = priority;
 }
 
-static int IRQ_get_next (openpic_t *opp, IRQ_queue_t *q)
+static int IRQ_get_next(OpenPICState *opp, IRQ_queue_t *q)
 {
     if (q->next == -1) {
         /* XXX: optimize */
@@ -318,7 +306,7 @@ static int IRQ_get_next (openpic_t *opp, IRQ_queue_t *q)
     return q->next;
 }
 
-static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ)
+static void IRQ_local_pipe(OpenPICState *opp, int n_CPU, int n_IRQ)
 {
     IRQ_dst_t *dst;
     IRQ_src_t *src;
@@ -339,7 +327,7 @@ static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ)
                 __func__, n_IRQ, n_CPU);
         return;
     }
-    set_bit(&src->ipvp, IPVP_ACTIVITY);
+    src->ipvp |= IPVP_ACTIVITY_MASK;
     IRQ_setbit(&dst->raised, n_IRQ);
     if (priority < dst->raised.priority) {
         /* An higher priority IRQ is already raised */
@@ -356,11 +344,11 @@ static void IRQ_local_pipe (openpic_t *opp, int n_CPU, int n_IRQ)
         return;
     }
     DPRINTF("Raise OpenPIC INT output cpu %d irq %d\n", n_CPU, n_IRQ);
-    opp->irq_raise(opp, n_CPU, src);
+    openpic_irq_raise(opp, n_CPU, src);
 }
 
 /* update pic state because registers for n_IRQ have changed value */
-static void openpic_update_irq(openpic_t *opp, int n_IRQ)
+static void openpic_update_irq(OpenPICState *opp, int n_IRQ)
 {
     IRQ_src_t *src;
     int i;
@@ -372,7 +360,7 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ)
         DPRINTF("%s: IRQ %d is not pending\n", __func__, n_IRQ);
         return;
     }
-    if (test_bit(&src->ipvp, IPVP_MASK)) {
+    if (src->ipvp & IPVP_MASK_MASK) {
         /* Interrupt source is disabled */
         DPRINTF("%s: IRQ %d is disabled\n", __func__, n_IRQ);
         return;
@@ -382,7 +370,7 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ)
         DPRINTF("%s: IRQ %d has 0 priority\n", __func__, n_IRQ);
         return;
     }
-    if (test_bit(&src->ipvp, IPVP_ACTIVITY)) {
+    if (src->ipvp & IPVP_ACTIVITY_MASK) {
         /* IRQ already active */
         DPRINTF("%s: IRQ %d is already active\n", __func__, n_IRQ);
         return;
@@ -396,18 +384,19 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ)
     if (src->ide == (1 << src->last_cpu)) {
         /* Only one CPU is allowed to receive this IRQ */
         IRQ_local_pipe(opp, src->last_cpu, n_IRQ);
-    } else if (!test_bit(&src->ipvp, IPVP_MODE)) {
+    } else if (!(src->ipvp & IPVP_MODE_MASK)) {
         /* Directed delivery mode */
         for (i = 0; i < opp->nb_cpus; i++) {
-            if (test_bit(&src->ide, i))
+            if (src->ide & (1 << i)) {
                 IRQ_local_pipe(opp, i, n_IRQ);
+            }
         }
     } else {
         /* Distributed delivery mode */
         for (i = src->last_cpu + 1; i != src->last_cpu; i++) {
             if (i == opp->nb_cpus)
                 i = 0;
-            if (test_bit(&src->ide, i)) {
+            if (src->ide & (1 << i)) {
                 IRQ_local_pipe(opp, i, n_IRQ);
                 src->last_cpu = i;
                 break;
@@ -418,17 +407,18 @@ static void openpic_update_irq(openpic_t *opp, int n_IRQ)
 
 static void openpic_set_irq(void *opaque, int n_IRQ, int level)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     IRQ_src_t *src;
 
     src = &opp->src[n_IRQ];
     DPRINTF("openpic: set irq %d = %d ipvp=%08x\n",
             n_IRQ, level, src->ipvp);
-    if (test_bit(&src->ipvp, IPVP_SENSE)) {
+    if (src->ipvp & IPVP_SENSE_MASK) {
         /* level-sensitive irq */
         src->pending = level;
-        if (!level)
-            reset_bit(&src->ipvp, IPVP_ACTIVITY);
+        if (!level) {
+            src->ipvp &= ~IPVP_ACTIVITY_MASK;
+        }
     } else {
         /* edge-sensitive irq */
         if (level)
@@ -437,24 +427,24 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
     openpic_update_irq(opp, n_IRQ);
 }
 
-static void openpic_reset (void *opaque)
+static void openpic_reset(DeviceState *d)
 {
-    openpic_t *opp = (openpic_t *)opaque;
+    OpenPICState *opp = FROM_SYSBUS(typeof (*opp), sysbus_from_qdev(d));
     int i;
 
     opp->glbc = 0x80000000;
     /* Initialise controller registers */
-    opp->frep = ((OPENPIC_EXT_IRQ - 1) << 16) | ((MAX_CPU - 1) << 8) | VID;
-    opp->veni = VENI;
+    opp->frep = ((opp->nb_irqs -1) << FREP_NIRQ_SHIFT) |
+                ((opp->nb_cpus -1) << FREP_NCPU_SHIFT) |
+                (opp->vid << FREP_VID_SHIFT);
+
     opp->pint = 0x00000000;
-    opp->spve = 0x000000FF;
-    opp->tifr = 0x003F7A00;
-    /* ? */
-    opp->micr = 0x00000000;
+    opp->spve = -1 & opp->spve_mask;
+    opp->tifr = opp->tifr_reset;
     /* Initialise IRQ sources */
     for (i = 0; i < opp->max_irq; i++) {
-        opp->src[i].ipvp = 0xA0000000;
-        opp->src[i].ide  = 0x00000000;
+        opp->src[i].ipvp = opp->ipvp_reset;
+        opp->src[i].ide  = opp->ide_reset;
     }
     /* Initialise IRQ destinations */
     for (i = 0; i < MAX_CPU; i++) {
@@ -470,34 +460,21 @@ static void openpic_reset (void *opaque)
         opp->timers[i].ticc = 0x00000000;
         opp->timers[i].tibc = 0x80000000;
     }
-    /* Initialise doorbells */
-#if MAX_DBL > 0
-    opp->dar = 0x00000000;
-    for (i = 0; i < MAX_DBL; i++) {
-        opp->doorbells[i].dmr  = 0x00000000;
-    }
-#endif
-    /* Initialise mailboxes */
-#if MAX_MBX > 0
-    for (i = 0; i < MAX_MBX; i++) { /* ? */
-        opp->mailboxes[i].mbr   = 0x00000000;
-    }
-#endif
     /* Go out of RESET state */
     opp->glbc = 0x00000000;
 }
 
-static inline uint32_t read_IRQreg_ide(openpic_t *opp, int n_IRQ)
+static inline uint32_t read_IRQreg_ide(OpenPICState *opp, int n_IRQ)
 {
     return opp->src[n_IRQ].ide;
 }
 
-static inline uint32_t read_IRQreg_ipvp(openpic_t *opp, int n_IRQ)
+static inline uint32_t read_IRQreg_ipvp(OpenPICState *opp, int n_IRQ)
 {
     return opp->src[n_IRQ].ipvp;
 }
 
-static inline void write_IRQreg_ide(openpic_t *opp, int n_IRQ, uint32_t val)
+static inline void write_IRQreg_ide(OpenPICState *opp, int n_IRQ, uint32_t val)
 {
     uint32_t tmp;
 
@@ -507,7 +484,7 @@ static inline void write_IRQreg_ide(openpic_t *opp, int n_IRQ, uint32_t val)
     DPRINTF("Set IDE %d to 0x%08x\n", n_IRQ, opp->src[n_IRQ].ide);
 }
 
-static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val)
+static inline void write_IRQreg_ipvp(OpenPICState *opp, int n_IRQ, uint32_t val)
 {
     /* NOTE: not fully accurate for special IRQs, but simple and sufficient */
     /* ACTIVITY bit is read-only */
@@ -518,87 +495,10 @@ static inline void write_IRQreg_ipvp(openpic_t *opp, int n_IRQ, uint32_t val)
             opp->src[n_IRQ].ipvp);
 }
 
-#if 0 // Code provision for Intel model
-#if MAX_DBL > 0
-static uint32_t read_doorbell_register (openpic_t *opp,
-                                        int n_dbl, uint32_t offset)
-{
-    uint32_t retval;
-
-    switch (offset) {
-    case DBL_IPVP_OFFSET:
-        retval = read_IRQreg_ipvp(opp, IRQ_DBL0 + n_dbl);
-        break;
-    case DBL_IDE_OFFSET:
-        retval = read_IRQreg_ide(opp, IRQ_DBL0 + n_dbl);
-        break;
-    case DBL_DMR_OFFSET:
-        retval = opp->doorbells[n_dbl].dmr;
-        break;
-    }
-
-    return retval;
-}
-
-static void write_doorbell_register (penpic_t *opp, int n_dbl,
-                                     uint32_t offset, uint32_t value)
-{
-    switch (offset) {
-    case DBL_IVPR_OFFSET:
-        write_IRQreg_ipvp(opp, IRQ_DBL0 + n_dbl, value);
-        break;
-    case DBL_IDE_OFFSET:
-        write_IRQreg_ide(opp, IRQ_DBL0 + n_dbl, value);
-        break;
-    case DBL_DMR_OFFSET:
-        opp->doorbells[n_dbl].dmr = value;
-        break;
-    }
-}
-#endif
-
-#if MAX_MBX > 0
-static uint32_t read_mailbox_register (openpic_t *opp,
-                                       int n_mbx, uint32_t offset)
-{
-    uint32_t retval;
-
-    switch (offset) {
-    case MBX_MBR_OFFSET:
-        retval = opp->mailboxes[n_mbx].mbr;
-        break;
-    case MBX_IVPR_OFFSET:
-        retval = read_IRQreg_ipvp(opp, IRQ_MBX0 + n_mbx);
-        break;
-    case MBX_DMR_OFFSET:
-        retval = read_IRQreg_ide(opp, IRQ_MBX0 + n_mbx);
-        break;
-    }
-
-    return retval;
-}
-
-static void write_mailbox_register (openpic_t *opp, int n_mbx,
-                                    uint32_t address, uint32_t value)
-{
-    switch (offset) {
-    case MBX_MBR_OFFSET:
-        opp->mailboxes[n_mbx].mbr = value;
-        break;
-    case MBX_IVPR_OFFSET:
-        write_IRQreg_ipvp(opp, IRQ_MBX0 + n_mbx, value);
-        break;
-    case MBX_DMR_OFFSET:
-        write_IRQreg_ide(opp, IRQ_MBX0 + n_mbx, value);
-        break;
-    }
-}
-#endif
-#endif /* 0 : Code provision for Intel model */
-
-static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val)
+static void openpic_gbl_write(void *opaque, hwaddr addr, uint64_t val,
+                              unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     IRQ_dst_t *dst;
     int idx;
 
@@ -621,9 +521,9 @@ static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val)
     case 0x1000: /* FREP */
         break;
     case 0x1020: /* GLBC */
-        if (val & 0x80000000 && opp->reset)
-            opp->reset(opp);
-        opp->glbc = val & ~0x80000000;
+        if (val & 0x80000000) {
+            openpic_reset(&opp->busdev.qdev);
+        }
         break;
     case 0x1080: /* VENI */
         break;
@@ -652,19 +552,16 @@ static void openpic_gbl_write (void *opaque, hwaddr addr, uint32_t val)
         }
         break;
     case 0x10E0: /* SPVE */
-        opp->spve = val & 0x000000FF;
-        break;
-    case 0x10F0: /* TIFR */
-        opp->tifr = val;
+        opp->spve = val & opp->spve_mask;
         break;
     default:
         break;
     }
 }
 
-static uint32_t openpic_gbl_read (void *opaque, hwaddr addr)
+static uint64_t openpic_gbl_read(void *opaque, hwaddr addr, unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     uint32_t retval;
 
     DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
@@ -708,9 +605,6 @@ static uint32_t openpic_gbl_read (void *opaque, hwaddr addr)
     case 0x10E0: /* SPVE */
         retval = opp->spve;
         break;
-    case 0x10F0: /* TIFR */
-        retval = opp->tifr;
-        break;
     default:
         break;
     }
@@ -719,73 +613,83 @@ static uint32_t openpic_gbl_read (void *opaque, hwaddr addr)
     return retval;
 }
 
-static void openpic_timer_write (void *opaque, uint32_t addr, uint32_t val)
+static void openpic_tmr_write(void *opaque, hwaddr addr, uint64_t val,
+                                unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     int idx;
 
     DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
     if (addr & 0xF)
         return;
-    addr -= 0x10;
-    addr &= 0xFFFF;
-    idx = (addr & 0xFFF0) >> 6;
+    idx = (addr >> 6) & 0x3;
     addr = addr & 0x30;
-    switch (addr) {
-    case 0x00: /* TICC */
+
+    if (addr == 0x0) {
+        /* TIFR (TFRR) */
+        opp->tifr = val;
+        return;
+    }
+    switch (addr & 0x30) {
+    case 0x00: /* TICC (GTCCR) */
         break;
-    case 0x10: /* TIBC */
+    case 0x10: /* TIBC (GTBCR) */
         if ((opp->timers[idx].ticc & 0x80000000) != 0 &&
             (val & 0x80000000) == 0 &&
             (opp->timers[idx].tibc & 0x80000000) != 0)
             opp->timers[idx].ticc &= ~0x80000000;
         opp->timers[idx].tibc = val;
         break;
-    case 0x20: /* TIVP */
+    case 0x20: /* TIVP (GTIVPR) */
         write_IRQreg_ipvp(opp, opp->irq_tim0 + idx, val);
         break;
-    case 0x30: /* TIDE */
+    case 0x30: /* TIDE (GTIDR) */
         write_IRQreg_ide(opp, opp->irq_tim0 + idx, val);
         break;
     }
 }
 
-static uint32_t openpic_timer_read (void *opaque, uint32_t addr)
+static uint64_t openpic_tmr_read(void *opaque, hwaddr addr, unsigned len)
 {
-    openpic_t *opp = opaque;
-    uint32_t retval;
+    OpenPICState *opp = opaque;
+    uint32_t retval = -1;
     int idx;
 
     DPRINTF("%s: addr %08x\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-    addr -= 0x10;
-    addr &= 0xFFFF;
-    idx = (addr & 0xFFF0) >> 6;
-    addr = addr & 0x30;
-    switch (addr) {
-    case 0x00: /* TICC */
+    if (addr & 0xF) {
+        goto out;
+    }
+    idx = (addr >> 6) & 0x3;
+    if (addr == 0x0) {
+        /* TIFR (TFRR) */
+        retval = opp->tifr;
+        goto out;
+    }
+    switch (addr & 0x30) {
+    case 0x00: /* TICC (GTCCR) */
         retval = opp->timers[idx].ticc;
         break;
-    case 0x10: /* TIBC */
+    case 0x10: /* TIBC (GTBCR) */
         retval = opp->timers[idx].tibc;
         break;
-    case 0x20: /* TIPV */
+    case 0x20: /* TIPV (TIPV) */
         retval = read_IRQreg_ipvp(opp, opp->irq_tim0 + idx);
         break;
-    case 0x30: /* TIDE */
+    case 0x30: /* TIDE (TIDR) */
         retval = read_IRQreg_ide(opp, opp->irq_tim0 + idx);
         break;
     }
+
+out:
     DPRINTF("%s: => %08x\n", __func__, retval);
 
     return retval;
 }
 
-static void openpic_src_write (void *opaque, uint32_t addr, uint32_t val)
+static void openpic_src_write(void *opaque, hwaddr addr, uint64_t val,
+                              unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     int idx;
 
     DPRINTF("%s: addr %08x <= %08x\n", __func__, addr, val);
@@ -802,9 +706,9 @@ static void openpic_src_write (void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-static uint32_t openpic_src_read (void *opaque, uint32_t addr)
+static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     uint32_t retval;
     int idx;
 
@@ -826,10 +730,72 @@ static uint32_t openpic_src_read (void *opaque, uint32_t addr)
     return retval;
 }
 
+static void openpic_msi_write(void *opaque, hwaddr addr, uint64_t val,
+                              unsigned size)
+{
+    OpenPICState *opp = opaque;
+    int idx = opp->irq_msi;
+    int srs, ibs;
+
+    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
+    if (addr & 0xF) {
+        return;
+    }
+
+    switch (addr) {
+    case MSIIR_OFFSET:
+        srs = val >> MSIIR_SRS_SHIFT;
+        idx += srs;
+        ibs = (val & MSIIR_IBS_MASK) >> MSIIR_IBS_SHIFT;
+        opp->msi[srs].msir |= 1 << ibs;
+        openpic_set_irq(opp, idx, 1);
+        break;
+    default:
+        /* most registers are read-only, thus ignored */
+        break;
+    }
+}
+
+static uint64_t openpic_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+    OpenPICState *opp = opaque;
+    uint64_t r = 0;
+    int i, srs;
+
+    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
+    if (addr & 0xF) {
+        return -1;
+    }
+
+    srs = addr >> 4;
+
+    switch (addr) {
+    case 0x00:
+    case 0x10:
+    case 0x20:
+    case 0x30:
+    case 0x40:
+    case 0x50:
+    case 0x60:
+    case 0x70: /* MSIRs */
+        r = opp->msi[srs].msir;
+        /* Clear on read */
+        opp->msi[srs].msir = 0;
+        break;
+    case 0x120: /* MSISR */
+        for (i = 0; i < MAX_MSI; i++) {
+            r |= (opp->msi[i].msir ? 1 : 0) << i;
+        }
+        break;
+    }
+
+    return r;
+}
+
 static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
                                        uint32_t val, int idx)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     IRQ_src_t *src;
     IRQ_dst_t *dst;
     int s_IRQ, n_IRQ;
@@ -841,7 +807,6 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
     dst = &opp->dst[idx];
     addr &= 0xFF0;
     switch (addr) {
-#if MAX_IPI > 0
     case 0x40: /* IPIDR */
     case 0x50:
     case 0x60:
@@ -853,7 +818,6 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
         openpic_set_irq(opp, opp->irq_ipi0 + idx, 1);
         openpic_set_irq(opp, opp->irq_ipi0 + idx, 0);
         break;
-#endif
     case 0x80: /* PCTP */
         dst->pctp = val & 0x0000000F;
         break;
@@ -878,7 +842,7 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
              IPVP_PRIORITY(src->ipvp) > dst->servicing.priority)) {
             DPRINTF("Raise OpenPIC INT output cpu %d irq %d\n",
                     idx, n_IRQ);
-            opp->irq_raise(opp, idx, src);
+            openpic_irq_raise(opp, idx, src);
         }
         break;
     default:
@@ -886,7 +850,8 @@ static void openpic_cpu_write_internal(void *opaque, hwaddr addr,
     }
 }
 
-static void openpic_cpu_write(void *opaque, hwaddr addr, uint32_t val)
+static void openpic_cpu_write(void *opaque, hwaddr addr, uint64_t val,
+                              unsigned len)
 {
     openpic_cpu_write_internal(opaque, addr, val, (addr & 0x1f000) >> 12);
 }
@@ -894,7 +859,7 @@ static void openpic_cpu_write(void *opaque, hwaddr addr, uint32_t val)
 static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
                                           int idx)
 {
-    openpic_t *opp = opaque;
+    OpenPICState *opp = opaque;
     IRQ_src_t *src;
     IRQ_dst_t *dst;
     uint32_t retval;
@@ -908,7 +873,7 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
     addr &= 0xFF0;
     switch (addr) {
     case 0x00: /* Block Revision Register1 (BRR1) */
-        retval = FSL_BRR1_IPID | FSL_BRR1_IPMJ | FSL_BRR1_IPMN;
+        retval = opp->brr1;
         break;
     case 0x80: /* PCTP */
         retval = dst->pctp;
@@ -926,13 +891,13 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
             retval = IPVP_VECTOR(opp->spve);
         } else {
             src = &opp->src[n_IRQ];
-            if (!test_bit(&src->ipvp, IPVP_ACTIVITY) ||
+            if (!(src->ipvp & IPVP_ACTIVITY_MASK) ||
                 !(IPVP_PRIORITY(src->ipvp) > dst->pctp)) {
                 /* - Spurious level-sensitive IRQ
                  * - Priorities has been changed
                  *   and the pending IRQ isn't allowed anymore
                  */
-                reset_bit(&src->ipvp, IPVP_ACTIVITY);
+                src->ipvp &= ~IPVP_ACTIVITY_MASK;
                 retval = IPVP_VECTOR(opp->spve);
             } else {
                 /* IRQ enter servicing state */
@@ -941,20 +906,20 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
             }
             IRQ_resetbit(&dst->raised, n_IRQ);
             dst->raised.next = -1;
-            if (!test_bit(&src->ipvp, IPVP_SENSE)) {
+            if (!(src->ipvp & IPVP_SENSE_MASK)) {
                 /* edge-sensitive IRQ */
-                reset_bit(&src->ipvp, IPVP_ACTIVITY);
+                src->ipvp &= ~IPVP_ACTIVITY_MASK;
                 src->pending = 0;
             }
 
             if ((n_IRQ >= opp->irq_ipi0) &&  (n_IRQ < (opp->irq_ipi0 + MAX_IPI))) {
                 src->ide &= ~(1 << idx);
-                if (src->ide && !test_bit(&src->ipvp, IPVP_SENSE)) {
+                if (src->ide && !(src->ipvp & IPVP_SENSE_MASK)) {
                     /* trigger on CPUs that didn't know about it yet */
                     openpic_set_irq(opp, n_IRQ, 1);
                     openpic_set_irq(opp, n_IRQ, 0);
                     /* if all CPUs knew about it, set active bit again */
-                    set_bit(&src->ipvp, IPVP_ACTIVITY);
+                    src->ipvp |= IPVP_ACTIVITY_MASK;
                 }
             }
         }
@@ -970,96 +935,109 @@ static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr,
     return retval;
 }
 
-static uint32_t openpic_cpu_read(void *opaque, hwaddr addr)
+static uint64_t openpic_cpu_read(void *opaque, hwaddr addr, unsigned len)
 {
     return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12);
 }
 
-static void openpic_buggy_write (void *opaque,
-                                 hwaddr addr, uint32_t val)
-{
-    printf("Invalid OPENPIC write access !\n");
-}
-
-static uint32_t openpic_buggy_read (void *opaque, hwaddr addr)
-{
-    printf("Invalid OPENPIC read access !\n");
-
-    return -1;
-}
-
-static void openpic_writel (void *opaque,
-                            hwaddr addr, uint32_t val)
-{
-    openpic_t *opp = opaque;
-
-    addr &= 0x3FFFF;
-    DPRINTF("%s: offset %08x val: %08x\n", __func__, (int)addr, val);
-    if (addr < 0x1100) {
-        /* Global registers */
-        openpic_gbl_write(opp, addr, val);
-    } else if (addr < 0x10000) {
-        /* Timers registers */
-        openpic_timer_write(opp, addr, val);
-    } else if (addr < 0x20000) {
-        /* Source registers */
-        openpic_src_write(opp, addr, val);
-    } else {
-        /* CPU registers */
-        openpic_cpu_write(opp, addr, val);
-    }
-}
+static const MemoryRegionOps openpic_glb_ops_le = {
+    .write = openpic_gbl_write,
+    .read  = openpic_gbl_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-static uint32_t openpic_readl (void *opaque,hwaddr addr)
-{
-    openpic_t *opp = opaque;
-    uint32_t retval;
+static const MemoryRegionOps openpic_glb_ops_be = {
+    .write = openpic_gbl_write,
+    .read  = openpic_gbl_read,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-    addr &= 0x3FFFF;
-    DPRINTF("%s: offset %08x\n", __func__, (int)addr);
-    if (addr < 0x1100) {
-        /* Global registers */
-        retval = openpic_gbl_read(opp, addr);
-    } else if (addr < 0x10000) {
-        /* Timers registers */
-        retval = openpic_timer_read(opp, addr);
-    } else if (addr < 0x20000) {
-        /* Source registers */
-        retval = openpic_src_read(opp, addr);
-    } else {
-        /* CPU registers */
-        retval = openpic_cpu_read(opp, addr);
-    }
+static const MemoryRegionOps openpic_tmr_ops_le = {
+    .write = openpic_tmr_write,
+    .read  = openpic_tmr_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-    return retval;
-}
+static const MemoryRegionOps openpic_tmr_ops_be = {
+    .write = openpic_tmr_write,
+    .read  = openpic_tmr_read,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-static uint64_t openpic_read(void *opaque, hwaddr addr,
-                             unsigned size)
-{
-    openpic_t *opp = opaque;
+static const MemoryRegionOps openpic_cpu_ops_le = {
+    .write = openpic_cpu_write,
+    .read  = openpic_cpu_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-    switch (size) {
-    case 4: return openpic_readl(opp, addr);
-    default: return openpic_buggy_read(opp, addr);
-    }
-}
+static const MemoryRegionOps openpic_cpu_ops_be = {
+    .write = openpic_cpu_write,
+    .read  = openpic_cpu_read,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-static void openpic_write(void *opaque, hwaddr addr,
-                          uint64_t data, unsigned size)
-{
-    openpic_t *opp = opaque;
+static const MemoryRegionOps openpic_src_ops_le = {
+    .write = openpic_src_write,
+    .read  = openpic_src_read,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-    switch (size) {
-    case 4: return openpic_writel(opp, addr, data);
-    default: return openpic_buggy_write(opp, addr, data);
-    }
-}
+static const MemoryRegionOps openpic_src_ops_be = {
+    .write = openpic_src_write,
+    .read  = openpic_src_read,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
 
-static const MemoryRegionOps openpic_ops = {
-    .read = openpic_read,
-    .write = openpic_write,
+static const MemoryRegionOps openpic_msi_ops_le = {
+    .read = openpic_msi_read,
+    .write = openpic_msi_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps openpic_msi_ops_be = {
+    .read = openpic_msi_read,
+    .write = openpic_msi_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
 };
 
 static void openpic_save_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
@@ -1075,12 +1053,10 @@ static void openpic_save_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
 
 static void openpic_save(QEMUFile* f, void *opaque)
 {
-    openpic_t *opp = (openpic_t *)opaque;
+    OpenPICState *opp = (OpenPICState *)opaque;
     unsigned int i;
 
-    qemu_put_be32s(f, &opp->frep);
     qemu_put_be32s(f, &opp->glbc);
-    qemu_put_be32s(f, &opp->micr);
     qemu_put_be32s(f, &opp->veni);
     qemu_put_be32s(f, &opp->pint);
     qemu_put_be32s(f, &opp->spve);
@@ -1089,15 +1065,13 @@ static void openpic_save(QEMUFile* f, void *opaque)
     for (i = 0; i < opp->max_irq; i++) {
         qemu_put_be32s(f, &opp->src[i].ipvp);
         qemu_put_be32s(f, &opp->src[i].ide);
-        qemu_put_sbe32s(f, &opp->src[i].type);
         qemu_put_sbe32s(f, &opp->src[i].last_cpu);
         qemu_put_sbe32s(f, &opp->src[i].pending);
     }
 
-    qemu_put_sbe32s(f, &opp->nb_cpus);
+    qemu_put_be32s(f, &opp->nb_cpus);
 
     for (i = 0; i < opp->nb_cpus; i++) {
-        qemu_put_be32s(f, &opp->dst[i].tfrr);
         qemu_put_be32s(f, &opp->dst[i].pctp);
         qemu_put_be32s(f, &opp->dst[i].pcsr);
         openpic_save_IRQ_queue(f, &opp->dst[i].raised);
@@ -1108,22 +1082,6 @@ static void openpic_save(QEMUFile* f, void *opaque)
         qemu_put_be32s(f, &opp->timers[i].ticc);
         qemu_put_be32s(f, &opp->timers[i].tibc);
     }
-
-#if MAX_DBL > 0
-    qemu_put_be32s(f, &opp->dar);
-
-    for (i = 0; i < MAX_DBL; i++) {
-        qemu_put_be32s(f, &opp->doorbells[i].dmr);
-    }
-#endif
-
-#if MAX_MBX > 0
-    for (i = 0; i < MAX_MAILBOXES; i++) {
-        qemu_put_be32s(f, &opp->mailboxes[i].mbr);
-    }
-#endif
-
-    pci_device_save(&opp->pci_dev, f);
 }
 
 static void openpic_load_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
@@ -1139,15 +1097,13 @@ static void openpic_load_IRQ_queue(QEMUFile* f, IRQ_queue_t *q)
 
 static int openpic_load(QEMUFile* f, void *opaque, int version_id)
 {
-    openpic_t *opp = (openpic_t *)opaque;
+    OpenPICState *opp = (OpenPICState *)opaque;
     unsigned int i;
 
     if (version_id != 1)
         return -EINVAL;
 
-    qemu_get_be32s(f, &opp->frep);
     qemu_get_be32s(f, &opp->glbc);
-    qemu_get_be32s(f, &opp->micr);
     qemu_get_be32s(f, &opp->veni);
     qemu_get_be32s(f, &opp->pint);
     qemu_get_be32s(f, &opp->spve);
@@ -1156,15 +1112,13 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
     for (i = 0; i < opp->max_irq; i++) {
         qemu_get_be32s(f, &opp->src[i].ipvp);
         qemu_get_be32s(f, &opp->src[i].ide);
-        qemu_get_sbe32s(f, &opp->src[i].type);
         qemu_get_sbe32s(f, &opp->src[i].last_cpu);
         qemu_get_sbe32s(f, &opp->src[i].pending);
     }
 
-    qemu_get_sbe32s(f, &opp->nb_cpus);
+    qemu_get_be32s(f, &opp->nb_cpus);
 
     for (i = 0; i < opp->nb_cpus; i++) {
-        qemu_get_be32s(f, &opp->dst[i].tfrr);
         qemu_get_be32s(f, &opp->dst[i].pctp);
         qemu_get_be32s(f, &opp->dst[i].pcsr);
         openpic_load_IRQ_queue(f, &opp->dst[i].raised);
@@ -1176,535 +1130,156 @@ static int openpic_load(QEMUFile* f, void *opaque, int version_id)
         qemu_get_be32s(f, &opp->timers[i].tibc);
     }
 
-#if MAX_DBL > 0
-    qemu_get_be32s(f, &opp->dar);
-
-    for (i = 0; i < MAX_DBL; i++) {
-        qemu_get_be32s(f, &opp->doorbells[i].dmr);
-    }
-#endif
-
-#if MAX_MBX > 0
-    for (i = 0; i < MAX_MAILBOXES; i++) {
-        qemu_get_be32s(f, &opp->mailboxes[i].mbr);
-    }
-#endif
-
-    return pci_device_load(&opp->pci_dev, f);
-}
-
-static void openpic_irq_raise(openpic_t *opp, int n_CPU, IRQ_src_t *src)
-{
-    qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]);
-}
-
-qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
-                        qemu_irq **irqs, qemu_irq irq_out)
-{
-    openpic_t *opp;
-    int i, m;
-
-    /* XXX: for now, only one CPU is supported */
-    if (nb_cpus != 1)
-        return NULL;
-    opp = g_malloc0(sizeof(openpic_t));
-    memory_region_init_io(&opp->mem, &openpic_ops, opp, "openpic", 0x40000);
-
-    //    isu_base &= 0xFFFC0000;
-    opp->nb_cpus = nb_cpus;
-    opp->max_irq = OPENPIC_MAX_IRQ;
-    opp->irq_ipi0 = OPENPIC_IRQ_IPI0;
-    opp->irq_tim0 = OPENPIC_IRQ_TIM0;
-    /* Set IRQ types */
-    for (i = 0; i < OPENPIC_EXT_IRQ; i++) {
-        opp->src[i].type = IRQ_EXTERNAL;
-    }
-    for (; i < OPENPIC_IRQ_TIM0; i++) {
-        opp->src[i].type = IRQ_SPECIAL;
-    }
-#if MAX_IPI > 0
-    m = OPENPIC_IRQ_IPI0;
-#else
-    m = OPENPIC_IRQ_DBL0;
-#endif
-    for (; i < m; i++) {
-        opp->src[i].type = IRQ_TIMER;
-    }
-    for (; i < OPENPIC_MAX_IRQ; i++) {
-        opp->src[i].type = IRQ_INTERNAL;
-    }
-    for (i = 0; i < nb_cpus; i++)
-        opp->dst[i].irqs = irqs[i];
-    opp->irq_out = irq_out;
-
-    register_savevm(&opp->pci_dev.qdev, "openpic", 0, 2,
-                    openpic_save, openpic_load, opp);
-    qemu_register_reset(openpic_reset, opp);
-
-    opp->irq_raise = openpic_irq_raise;
-    opp->reset = openpic_reset;
-
-    if (pmem)
-        *pmem = &opp->mem;
-
-    return qemu_allocate_irqs(openpic_set_irq, opp, opp->max_irq);
-}
-
-static void mpic_irq_raise(openpic_t *mpp, int n_CPU, IRQ_src_t *src)
-{
-    int n_ci = IDR_CI0 - n_CPU;
-
-    if(test_bit(&src->ide, n_ci)) {
-        qemu_irq_raise(mpp->dst[n_CPU].irqs[OPENPIC_OUTPUT_CINT]);
-    }
-    else {
-        qemu_irq_raise(mpp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]);
-    }
+    return 0;
 }
 
-static void mpic_reset (void *opaque)
+static void openpic_irq_raise(OpenPICState *opp, int n_CPU, IRQ_src_t *src)
 {
-    openpic_t *mpp = (openpic_t *)opaque;
-    int i;
+    int n_ci = IDR_CI0_SHIFT - n_CPU;
 
-    mpp->glbc = 0x80000000;
-    /* Initialise controller registers */
-    mpp->frep = 0x004f0002 | ((mpp->nb_cpus - 1) << 8);
-    mpp->veni = VENI;
-    mpp->pint = 0x00000000;
-    mpp->spve = 0x0000FFFF;
-    /* Initialise IRQ sources */
-    for (i = 0; i < mpp->max_irq; i++) {
-        mpp->src[i].ipvp = 0x80800000;
-        mpp->src[i].ide  = 0x00000001;
-    }
-    /* Set IDE for IPIs to 0 so we don't get spurious interrupts */
-    for (i = mpp->irq_ipi0; i < (mpp->irq_ipi0 + MAX_IPI); i++) {
-        mpp->src[i].ide = 0;
-    }
-    /* Initialise IRQ destinations */
-    for (i = 0; i < MAX_CPU; i++) {
-        mpp->dst[i].pctp      = 0x0000000F;
-        mpp->dst[i].tfrr      = 0x00000000;
-        memset(&mpp->dst[i].raised, 0, sizeof(IRQ_queue_t));
-        mpp->dst[i].raised.next = -1;
-        memset(&mpp->dst[i].servicing, 0, sizeof(IRQ_queue_t));
-        mpp->dst[i].servicing.next = -1;
-    }
-    /* Initialise timers */
-    for (i = 0; i < MAX_TMR; i++) {
-        mpp->timers[i].ticc = 0x00000000;
-        mpp->timers[i].tibc = 0x80000000;
+    if ((opp->flags & OPENPIC_FLAG_IDE_CRIT) && (src->ide & (1 << n_ci))) {
+        qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_CINT]);
+    } else {
+        qemu_irq_raise(opp->dst[n_CPU].irqs[OPENPIC_OUTPUT_INT]);
     }
-    /* Go out of RESET state */
-    mpp->glbc = 0x00000000;
 }
 
-static void mpic_timer_write (void *opaque, hwaddr addr, uint32_t val)
-{
-    openpic_t *mpp = opaque;
-    int idx, cpu;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
-    addr &= 0xFFFF;
-    cpu = addr >> 12;
-    idx = (addr >> 6) & 0x3;
-    switch (addr & 0x30) {
-    case 0x00: /* gtccr */
-        break;
-    case 0x10: /* gtbcr */
-        if ((mpp->timers[idx].ticc & 0x80000000) != 0 &&
-            (val & 0x80000000) == 0 &&
-            (mpp->timers[idx].tibc & 0x80000000) != 0)
-            mpp->timers[idx].ticc &= ~0x80000000;
-        mpp->timers[idx].tibc = val;
-        break;
-    case 0x20: /* GTIVPR */
-        write_IRQreg_ipvp(mpp, MPIC_TMR_IRQ + idx, val);
-        break;
-    case 0x30: /* GTIDR & TFRR */
-        if ((addr & 0xF0) == 0xF0)
-            mpp->dst[cpu].tfrr = val;
-        else
-            write_IRQreg_ide(mpp, MPIC_TMR_IRQ + idx, val);
-        break;
-    }
-}
+struct memreg {
+    const char             *name;
+    MemoryRegionOps const  *ops;
+    bool                   map;
+    hwaddr      start_addr;
+    ram_addr_t              size;
+};
 
-static uint32_t mpic_timer_read (void *opaque, hwaddr addr)
+static int openpic_init(SysBusDevice *dev)
 {
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx, cpu;
+    OpenPICState *opp = FROM_SYSBUS(typeof (*opp), dev);
+    int i, j;
+    struct memreg list_le[] = {
+        {"glb", &openpic_glb_ops_le, true,
+                OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE},
+        {"tmr", &openpic_tmr_ops_le, true,
+                OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE},
+        {"msi", &openpic_msi_ops_le, true,
+                OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE},
+        {"src", &openpic_src_ops_le, true,
+                OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE},
+        {"cpu", &openpic_cpu_ops_le, true,
+                OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE},
+    };
+    struct memreg list_be[] = {
+        {"glb", &openpic_glb_ops_be, true,
+                OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE},
+        {"tmr", &openpic_tmr_ops_be, true,
+                OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE},
+        {"msi", &openpic_msi_ops_be, true,
+                OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE},
+        {"src", &openpic_src_ops_be, true,
+                OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE},
+        {"cpu", &openpic_cpu_ops_be, true,
+                OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE},
+    };
+    struct memreg *list;
 
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-    addr &= 0xFFFF;
-    cpu = addr >> 12;
-    idx = (addr >> 6) & 0x3;
-    switch (addr & 0x30) {
-    case 0x00: /* gtccr */
-        retval = mpp->timers[idx].ticc;
-        break;
-    case 0x10: /* gtbcr */
-        retval = mpp->timers[idx].tibc;
-        break;
-    case 0x20: /* TIPV */
-        retval = read_IRQreg_ipvp(mpp, MPIC_TMR_IRQ + idx);
-        break;
-    case 0x30: /* TIDR */
-        if ((addr &0xF0) == 0XF0)
-            retval = mpp->dst[cpu].tfrr;
-        else
-            retval = read_IRQreg_ide(mpp, MPIC_TMR_IRQ + idx);
+    switch (opp->model) {
+    case OPENPIC_MODEL_FSL_MPIC_20:
+    default:
+        opp->flags |= OPENPIC_FLAG_IDE_CRIT;
+        opp->nb_irqs = 80;
+        opp->vid = VID_REVISION_1_2;
+        opp->veni = VENI_GENERIC;
+        opp->spve_mask = 0xFFFF;
+        opp->tifr_reset = 0x00000000;
+        opp->ipvp_reset = 0x80000000;
+        opp->ide_reset = 0x00000001;
+        opp->max_irq = FSL_MPIC_20_MAX_IRQ;
+        opp->irq_ipi0 = FSL_MPIC_20_IPI_IRQ;
+        opp->irq_tim0 = FSL_MPIC_20_TMR_IRQ;
+        opp->irq_msi = FSL_MPIC_20_MSI_IRQ;
+        opp->brr1 = FSL_BRR1_IPID | FSL_BRR1_IPMJ | FSL_BRR1_IPMN;
+        msi_supported = true;
+        list = list_be;
         break;
-    }
-    DPRINTF("%s: => %08x\n", __func__, retval);
-
-    return retval;
-}
-
-static void mpic_src_ext_write (void *opaque, hwaddr addr,
-                                uint32_t val)
-{
-    openpic_t *mpp = opaque;
-    int idx = MPIC_EXT_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
-
-    if (addr < MPIC_EXT_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            write_IRQreg_ide(mpp, idx, val);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            write_IRQreg_ipvp(mpp, idx, val);
+    case OPENPIC_MODEL_RAVEN:
+        opp->nb_irqs = RAVEN_MAX_EXT;
+        opp->vid = VID_REVISION_1_3;
+        opp->veni = VENI_GENERIC;
+        opp->spve_mask = 0xFF;
+        opp->tifr_reset = 0x003F7A00;
+        opp->ipvp_reset = 0xA0000000;
+        opp->ide_reset = 0x00000000;
+        opp->max_irq = RAVEN_MAX_IRQ;
+        opp->irq_ipi0 = RAVEN_IPI_IRQ;
+        opp->irq_tim0 = RAVEN_TMR_IRQ;
+        opp->brr1 = -1;
+        list = list_le;
+        /* Don't map MSI region */
+        list[2].map = false;
+
+        /* Only UP supported today */
+        if (opp->nb_cpus != 1) {
+            return -EINVAL;
         }
+        break;
     }
-}
 
-static uint32_t mpic_src_ext_read (void *opaque, hwaddr addr)
-{
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx = MPIC_EXT_IRQ;
+    memory_region_init(&opp->mem, "openpic", 0x40000);
 
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-
-    if (addr < MPIC_EXT_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg_ide(mpp, idx);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg_ipvp(mpp, idx);
+    for (i = 0; i < ARRAY_SIZE(list_le); i++) {
+        if (!list[i].map) {
+            continue;
         }
-        DPRINTF("%s: => %08x\n", __func__, retval);
-    }
-
-    return retval;
-}
-
-static void mpic_src_int_write (void *opaque, hwaddr addr,
-                                uint32_t val)
-{
-    openpic_t *mpp = opaque;
-    int idx = MPIC_INT_IRQ;
 
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
-
-    if (addr < MPIC_INT_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            write_IRQreg_ide(mpp, idx, val);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            write_IRQreg_ipvp(mpp, idx, val);
-        }
-    }
-}
-
-static uint32_t mpic_src_int_read (void *opaque, hwaddr addr)
-{
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx = MPIC_INT_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
+        memory_region_init_io(&opp->sub_io_mem[i], list[i].ops, opp,
+                              list[i].name, list[i].size);
 
-    if (addr < MPIC_INT_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg_ide(mpp, idx);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg_ipvp(mpp, idx);
-        }
-        DPRINTF("%s: => %08x\n", __func__, retval);
+        memory_region_add_subregion(&opp->mem, list[i].start_addr,
+                                    &opp->sub_io_mem[i]);
     }
 
-    return retval;
-}
-
-static void mpic_src_msg_write (void *opaque, hwaddr addr,
-                                uint32_t val)
-{
-    openpic_t *mpp = opaque;
-    int idx = MPIC_MSG_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
-
-    if (addr < MPIC_MSG_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            write_IRQreg_ide(mpp, idx, val);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            write_IRQreg_ipvp(mpp, idx, val);
+    for (i = 0; i < opp->nb_cpus; i++) {
+        opp->dst[i].irqs = g_new(qemu_irq, OPENPIC_OUTPUT_NB);
+        for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+            sysbus_init_irq(dev, &opp->dst[i].irqs[j]);
         }
     }
-}
 
-static uint32_t mpic_src_msg_read (void *opaque, hwaddr addr)
-{
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx = MPIC_MSG_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
+    register_savevm(&opp->busdev.qdev, "openpic", 0, 2,
+                    openpic_save, openpic_load, opp);
 
-    if (addr < MPIC_MSG_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg_ide(mpp, idx);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg_ipvp(mpp, idx);
-        }
-        DPRINTF("%s: => %08x\n", __func__, retval);
-    }
+    sysbus_init_mmio(dev, &opp->mem);
+    qdev_init_gpio_in(&dev->qdev, openpic_set_irq, opp->max_irq);
 
-    return retval;
+    return 0;
 }
 
-static void mpic_src_msi_write (void *opaque, hwaddr addr,
-                                uint32_t val)
-{
-    openpic_t *mpp = opaque;
-    int idx = MPIC_MSI_IRQ;
-
-    DPRINTF("%s: addr " TARGET_FMT_plx " <= %08x\n", __func__, addr, val);
-    if (addr & 0xF)
-        return;
+static Property openpic_properties[] = {
+    DEFINE_PROP_UINT32("model", OpenPICState, model, OPENPIC_MODEL_FSL_MPIC_20),
+    DEFINE_PROP_UINT32("nb_cpus", OpenPICState, nb_cpus, 1),
+    DEFINE_PROP_END_OF_LIST(),
+};
 
-    if (addr < MPIC_MSI_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            write_IRQreg_ide(mpp, idx, val);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            write_IRQreg_ipvp(mpp, idx, val);
-        }
-    }
-}
-static uint32_t mpic_src_msi_read (void *opaque, hwaddr addr)
+static void openpic_class_init(ObjectClass *klass, void *data)
 {
-    openpic_t *mpp = opaque;
-    uint32_t retval;
-    int idx = MPIC_MSI_IRQ;
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-    DPRINTF("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
-    retval = 0xFFFFFFFF;
-    if (addr & 0xF)
-        return retval;
-
-    if (addr < MPIC_MSI_REG_SIZE) {
-        idx += (addr & 0xFFF0) >> 5;
-        if (addr & 0x10) {
-            /* EXDE / IFEDE / IEEDE */
-            retval = read_IRQreg_ide(mpp, idx);
-        } else {
-            /* EXVP / IFEVP / IEEVP */
-            retval = read_IRQreg_ipvp(mpp, idx);
-        }
-        DPRINTF("%s: => %08x\n", __func__, retval);
-    }
-
-    return retval;
+    k->init = openpic_init;
+    dc->props = openpic_properties;
+    dc->reset = openpic_reset;
 }
 
-static const MemoryRegionOps mpic_glb_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   openpic_gbl_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   openpic_gbl_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
+static TypeInfo openpic_info = {
+    .name          = "openpic",
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(OpenPICState),
+    .class_init    = openpic_class_init,
 };
 
-static const MemoryRegionOps mpic_tmr_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_timer_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_timer_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static const MemoryRegionOps mpic_cpu_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   openpic_cpu_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   openpic_cpu_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static const MemoryRegionOps mpic_ext_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_src_ext_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_src_ext_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static const MemoryRegionOps mpic_int_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_src_int_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_src_int_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static const MemoryRegionOps mpic_msg_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_src_msg_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_src_msg_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-static const MemoryRegionOps mpic_msi_ops = {
-    .old_mmio = {
-        .write = { openpic_buggy_write,
-                   openpic_buggy_write,
-                   mpic_src_msi_write,
-        },
-        .read  = { openpic_buggy_read,
-                   openpic_buggy_read,
-                   mpic_src_msi_read,
-        },
-    },
-    .endianness = DEVICE_BIG_ENDIAN,
-};
-
-qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
-                     int nb_cpus, qemu_irq **irqs, qemu_irq irq_out)
+static void openpic_register_types(void)
 {
-    openpic_t    *mpp;
-    int           i;
-    struct {
-        const char             *name;
-        MemoryRegionOps const  *ops;
-        hwaddr      start_addr;
-        ram_addr_t              size;
-    } const list[] = {
-        {"glb", &mpic_glb_ops, MPIC_GLB_REG_START, MPIC_GLB_REG_SIZE},
-        {"tmr", &mpic_tmr_ops, MPIC_TMR_REG_START, MPIC_TMR_REG_SIZE},
-        {"ext", &mpic_ext_ops, MPIC_EXT_REG_START, MPIC_EXT_REG_SIZE},
-        {"int", &mpic_int_ops, MPIC_INT_REG_START, MPIC_INT_REG_SIZE},
-        {"msg", &mpic_msg_ops, MPIC_MSG_REG_START, MPIC_MSG_REG_SIZE},
-        {"msi", &mpic_msi_ops, MPIC_MSI_REG_START, MPIC_MSI_REG_SIZE},
-        {"cpu", &mpic_cpu_ops, MPIC_CPU_REG_START, MPIC_CPU_REG_SIZE},
-    };
-
-    mpp = g_malloc0(sizeof(openpic_t));
-
-    memory_region_init(&mpp->mem, "mpic", 0x40000);
-    memory_region_add_subregion(address_space, base, &mpp->mem);
-
-    for (i = 0; i < sizeof(list)/sizeof(list[0]); i++) {
-
-        memory_region_init_io(&mpp->sub_io_mem[i], list[i].ops, mpp,
-                              list[i].name, list[i].size);
-
-        memory_region_add_subregion(&mpp->mem, list[i].start_addr,
-                                    &mpp->sub_io_mem[i]);
-    }
-
-    mpp->nb_cpus = nb_cpus;
-    mpp->max_irq = MPIC_MAX_IRQ;
-    mpp->irq_ipi0 = MPIC_IPI_IRQ;
-    mpp->irq_tim0 = MPIC_TMR_IRQ;
-
-    for (i = 0; i < nb_cpus; i++)
-        mpp->dst[i].irqs = irqs[i];
-    mpp->irq_out = irq_out;
-
-    mpp->irq_raise = mpic_irq_raise;
-    mpp->reset = mpic_reset;
-
-    register_savevm(NULL, "mpic", 0, 2, openpic_save, openpic_load, mpp);
-    qemu_register_reset(mpic_reset, mpp);
-
-    return qemu_allocate_irqs(openpic_set_irq, mpp, mpp->max_irq);
+    type_register_static(&openpic_info);
 }
+
+type_init(openpic_register_types)
diff --git a/hw/openpic.h b/hw/openpic.h
index f50a1e42bd..e226d7b563 100644
--- a/hw/openpic.h
+++ b/hw/openpic.h
@@ -11,8 +11,7 @@ enum {
     OPENPIC_OUTPUT_NB,
 };
 
-qemu_irq *openpic_init (MemoryRegion **pmem, int nb_cpus,
-                        qemu_irq **irqs, qemu_irq irq_out);
-qemu_irq *mpic_init (MemoryRegion *address_space, hwaddr base,
-                     int nb_cpus, qemu_irq **irqs, qemu_irq irq_out);
+#define OPENPIC_MODEL_RAVEN       0
+#define OPENPIC_MODEL_FSL_MPIC_20 1
+
 #endif /* __OPENPIC_H__ */
diff --git a/hw/pc_sysfw.c b/hw/pc_sysfw.c
index 40bced2322..d7ea3a5595 100644
--- a/hw/pc_sysfw.c
+++ b/hw/pc_sysfw.c
@@ -98,7 +98,7 @@ static void pc_fw_add_pflash_drv(void)
       return;
     }
 
-    if (!drive_init(opts, machine->use_scsi)) {
+    if (!drive_init(opts, machine->block_default_type)) {
         qemu_opts_del(opts);
     }
 }
diff --git a/hw/pci/Makefile.objs b/hw/pci/Makefile.objs
index aa7a0e84b5..fe965fe2f6 100644
--- a/hw/pci/Makefile.objs
+++ b/hw/pci/Makefile.objs
@@ -5,3 +5,5 @@ common-obj-$(CONFIG_PCI) += slotid_cap.o
 common-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o
 common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
 common-obj-$(CONFIG_NO_PCI) += pci-stub.o
+
+extra-obj-y += pci-stub.o
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 917327bfe6..a6a401e286 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -180,8 +180,7 @@ static void msix_table_mmio_write(void *opaque, hwaddr addr,
 static const MemoryRegionOps msix_table_mmio_ops = {
     .read = msix_table_mmio_read,
     .write = msix_table_mmio_write,
-    /* TODO: MSIX should be LITTLE_ENDIAN. */
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
@@ -198,8 +197,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr,
 
 static const MemoryRegionOps msix_pba_mmio_ops = {
     .read = msix_pba_mmio_read,
-    /* TODO: MSIX should be LITTLE_ENDIAN. */
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
         .max_access_size = 4,
diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index cb7cf8fba5..afdcc0e531 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -11,7 +11,7 @@ obj-y += ppc_newworld.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
 obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o pci/pci-hotplug.o spapr_iommu.o
-obj-$(CONFIG_PSERIES) += spapr_events.o
+obj-$(CONFIG_PSERIES) += spapr_events.o spapr_nvram.o
 # PowerPC 4xx boards
 obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
 obj-y += ppc440_bamboo.o
diff --git a/hw/ppc/e500-ccsr.h b/hw/ppc/e500-ccsr.h
new file mode 100644
index 0000000000..f20f51bcd2
--- /dev/null
+++ b/hw/ppc/e500-ccsr.h
@@ -0,0 +1,17 @@
+#ifndef E500_CCSR_H
+#define E500_CCSR_H
+
+#include "../sysbus.h"
+
+typedef struct PPCE500CCSRState {
+    /*< private >*/
+    SysBusDevice parent;
+    /*< public >*/
+
+    MemoryRegion ccsr_space;
+} PPCE500CCSRState;
+
+#define TYPE_CCSR "e500-ccsr"
+#define CCSR(obj) OBJECT_CHECK(PPCE500CCSRState, (obj), TYPE_CCSR)
+
+#endif /* E500_CCSR_H */
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index f77c488af7..8fab508c07 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -17,6 +17,7 @@
 #include "config.h"
 #include "qemu-common.h"
 #include "e500.h"
+#include "e500-ccsr.h"
 #include "net.h"
 #include "hw/hw.h"
 #include "hw/serial.h"
@@ -33,6 +34,7 @@
 #include "hw/sysbus.h"
 #include "exec-memory.h"
 #include "host-utils.h"
+#include "hw/ppce500_pci.h"
 
 #define BINARY_DEVICE_TREE_FILE    "mpc8544ds.dtb"
 #define UIMAGE_LOAD_BASE           0
@@ -46,13 +48,16 @@
 /* TODO: parameterize */
 #define MPC8544_CCSRBAR_BASE       0xE0000000ULL
 #define MPC8544_CCSRBAR_SIZE       0x00100000ULL
-#define MPC8544_MPIC_REGS_BASE     (MPC8544_CCSRBAR_BASE + 0x40000ULL)
-#define MPC8544_SERIAL0_REGS_BASE  (MPC8544_CCSRBAR_BASE + 0x4500ULL)
-#define MPC8544_SERIAL1_REGS_BASE  (MPC8544_CCSRBAR_BASE + 0x4600ULL)
-#define MPC8544_PCI_REGS_BASE      (MPC8544_CCSRBAR_BASE + 0x8000ULL)
+#define MPC8544_MPIC_REGS_OFFSET   0x40000ULL
+#define MPC8544_MSI_REGS_OFFSET   0x41600ULL
+#define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL
+#define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL
+#define MPC8544_PCI_REGS_OFFSET    0x8000ULL
+#define MPC8544_PCI_REGS_BASE      (MPC8544_CCSRBAR_BASE + \
+                                    MPC8544_PCI_REGS_OFFSET)
 #define MPC8544_PCI_REGS_SIZE      0x1000ULL
 #define MPC8544_PCI_IO             0xE1000000ULL
-#define MPC8544_UTIL_BASE          (MPC8544_CCSRBAR_BASE + 0xe0000ULL)
+#define MPC8544_UTIL_OFFSET        0xe0000ULL
 #define MPC8544_SPIN_BASE          0xEF000000ULL
 
 struct boot_info
@@ -62,25 +67,35 @@ struct boot_info
     uint32_t entry;
 };
 
-static void pci_map_create(void *fdt, uint32_t *pci_map, uint32_t mpic)
+static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot,
+                                int nr_slots, int *len)
 {
-    int i;
-    const uint32_t tmp[] = {
-                             /* IDSEL 0x11 J17 Slot 1 */
-                             0x8800, 0x0, 0x0, 0x1, mpic, 0x2, 0x1,
-                             0x8800, 0x0, 0x0, 0x2, mpic, 0x3, 0x1,
-                             0x8800, 0x0, 0x0, 0x3, mpic, 0x4, 0x1,
-                             0x8800, 0x0, 0x0, 0x4, mpic, 0x1, 0x1,
-
-                             /* IDSEL 0x12 J16 Slot 2 */
-                             0x9000, 0x0, 0x0, 0x1, mpic, 0x3, 0x1,
-                             0x9000, 0x0, 0x0, 0x2, mpic, 0x4, 0x1,
-                             0x9000, 0x0, 0x0, 0x3, mpic, 0x2, 0x1,
-                             0x9000, 0x0, 0x0, 0x4, mpic, 0x1, 0x1,
-                           };
-    for (i = 0; i < (7 * 8); i++) {
-        pci_map[i] = cpu_to_be32(tmp[i]);
+    int i = 0;
+    int slot;
+    int pci_irq;
+    int host_irq;
+    int last_slot = first_slot + nr_slots;
+    uint32_t *pci_map;
+
+    *len = nr_slots * 4 * 7 * sizeof(uint32_t);
+    pci_map = g_malloc(*len);
+
+    for (slot = first_slot; slot < last_slot; slot++) {
+        for (pci_irq = 0; pci_irq < 4; pci_irq++) {
+            pci_map[i++] = cpu_to_be32(slot << 11);
+            pci_map[i++] = cpu_to_be32(0x0);
+            pci_map[i++] = cpu_to_be32(0x0);
+            pci_map[i++] = cpu_to_be32(pci_irq + 1);
+            pci_map[i++] = cpu_to_be32(mpic);
+            host_irq = ppce500_pci_map_irq_slot(slot, pci_irq);
+            pci_map[i++] = cpu_to_be32(host_irq + 1);
+            pci_map[i++] = cpu_to_be32(0x1);
+        }
     }
+
+    assert((i * sizeof(uint32_t)) == *len);
+
+    return pci_map;
 }
 
 static void dt_serial_create(void *fdt, unsigned long long offset,
@@ -124,9 +139,12 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     char soc[128];
     char mpic[128];
     uint32_t mpic_ph;
+    uint32_t msi_ph;
     char gutil[128];
     char pci[128];
-    uint32_t pci_map[7 * 8];
+    char msi[128];
+    uint32_t *pci_map = NULL;
+    int len;
     uint32_t pci_ranges[14] =
         {
             0x2000000, 0x0, 0xc0000000,
@@ -267,13 +285,12 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     /* XXX should contain a reasonable value */
     qemu_devtree_setprop_cell(fdt, soc, "bus-frequency", 0);
 
-    snprintf(mpic, sizeof(mpic), "%s/pic@%llx", soc,
-             MPC8544_MPIC_REGS_BASE - MPC8544_CCSRBAR_BASE);
+    snprintf(mpic, sizeof(mpic), "%s/pic@%llx", soc, MPC8544_MPIC_REGS_OFFSET);
     qemu_devtree_add_subnode(fdt, mpic);
     qemu_devtree_setprop_string(fdt, mpic, "device_type", "open-pic");
     qemu_devtree_setprop_string(fdt, mpic, "compatible", "chrp,open-pic");
-    qemu_devtree_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_BASE -
-                               MPC8544_CCSRBAR_BASE, 0x40000);
+    qemu_devtree_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_OFFSET,
+                               0x40000);
     qemu_devtree_setprop_cell(fdt, mpic, "#address-cells", 0);
     qemu_devtree_setprop_cell(fdt, mpic, "#interrupt-cells", 2);
     mpic_ph = qemu_devtree_alloc_phandle(fdt);
@@ -286,19 +303,37 @@ static int ppce500_load_device_tree(CPUPPCState *env,
      * device it finds in the dt as serial output device. And we generate
      * devices in reverse order to the dt.
      */
-    dt_serial_create(fdt, MPC8544_SERIAL1_REGS_BASE - MPC8544_CCSRBAR_BASE,
+    dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
                      soc, mpic, "serial1", 1, false);
-    dt_serial_create(fdt, MPC8544_SERIAL0_REGS_BASE - MPC8544_CCSRBAR_BASE,
+    dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
                      soc, mpic, "serial0", 0, true);
 
     snprintf(gutil, sizeof(gutil), "%s/global-utilities@%llx", soc,
-             MPC8544_UTIL_BASE - MPC8544_CCSRBAR_BASE);
+             MPC8544_UTIL_OFFSET);
     qemu_devtree_add_subnode(fdt, gutil);
     qemu_devtree_setprop_string(fdt, gutil, "compatible", "fsl,mpc8544-guts");
-    qemu_devtree_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_BASE -
-                               MPC8544_CCSRBAR_BASE, 0x1000);
+    qemu_devtree_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_OFFSET, 0x1000);
     qemu_devtree_setprop(fdt, gutil, "fsl,has-rstcr", NULL, 0);
 
+    snprintf(msi, sizeof(msi), "/%s/msi@%llx", soc, MPC8544_MSI_REGS_OFFSET);
+    qemu_devtree_add_subnode(fdt, msi);
+    qemu_devtree_setprop_string(fdt, msi, "compatible", "fsl,mpic-msi");
+    qemu_devtree_setprop_cells(fdt, msi, "reg", MPC8544_MSI_REGS_OFFSET, 0x200);
+    msi_ph = qemu_devtree_alloc_phandle(fdt);
+    qemu_devtree_setprop_cells(fdt, msi, "msi-available-ranges", 0x0, 0x100);
+    qemu_devtree_setprop_phandle(fdt, msi, "interrupt-parent", mpic);
+    qemu_devtree_setprop_cells(fdt, msi, "interrupts",
+        0xe0, 0x0,
+        0xe1, 0x0,
+        0xe2, 0x0,
+        0xe3, 0x0,
+        0xe4, 0x0,
+        0xe5, 0x0,
+        0xe6, 0x0,
+        0xe7, 0x0);
+    qemu_devtree_setprop_cell(fdt, msi, "phandle", msi_ph);
+    qemu_devtree_setprop_cell(fdt, msi, "linux,phandle", msi_ph);
+
     snprintf(pci, sizeof(pci), "/pci@%llx", MPC8544_PCI_REGS_BASE);
     qemu_devtree_add_subnode(fdt, pci);
     qemu_devtree_setprop_cell(fdt, pci, "cell-index", 0);
@@ -306,14 +341,17 @@ static int ppce500_load_device_tree(CPUPPCState *env,
     qemu_devtree_setprop_string(fdt, pci, "device_type", "pci");
     qemu_devtree_setprop_cells(fdt, pci, "interrupt-map-mask", 0xf800, 0x0,
                                0x0, 0x7);
-    pci_map_create(fdt, pci_map, qemu_devtree_get_phandle(fdt, mpic));
-    qemu_devtree_setprop(fdt, pci, "interrupt-map", pci_map, sizeof(pci_map));
+    pci_map = pci_map_create(fdt, qemu_devtree_get_phandle(fdt, mpic),
+                             params->pci_first_slot, params->pci_nr_slots,
+                             &len);
+    qemu_devtree_setprop(fdt, pci, "interrupt-map", pci_map, len);
     qemu_devtree_setprop_phandle(fdt, pci, "interrupt-parent", mpic);
     qemu_devtree_setprop_cells(fdt, pci, "interrupts", 24, 2);
     qemu_devtree_setprop_cells(fdt, pci, "bus-range", 0, 255);
     for (i = 0; i < 14; i++) {
         pci_ranges[i] = cpu_to_be32(pci_ranges[i]);
     }
+    qemu_devtree_setprop_cell(fdt, pci, "fsl,msi", msi_ph);
     qemu_devtree_setprop(fdt, pci, "ranges", pci_ranges, sizeof(pci_ranges));
     qemu_devtree_setprop_cells(fdt, pci, "reg", MPC8544_PCI_REGS_BASE >> 32,
                                MPC8544_PCI_REGS_BASE, 0, 0x1000);
@@ -340,6 +378,7 @@ done:
     ret = fdt_size;
 
 out:
+    g_free(pci_map);
 
     return ret;
 }
@@ -417,11 +456,14 @@ void ppce500_init(PPCE500Params *params)
     target_ulong dt_base = 0;
     target_ulong initrd_base = 0;
     target_long initrd_size=0;
-    int i=0;
+    int i = 0, j, k;
     unsigned int pci_irq_nrs[4] = {1, 2, 3, 4};
     qemu_irq **irqs, *mpic;
     DeviceState *dev;
     CPUPPCState *firstenv = NULL;
+    MemoryRegion *ccsr_addr_space;
+    SysBusDevice *s;
+    PPCE500CCSRState *ccsr;
 
     /* Setup CPUs */
     if (params->cpu_model == NULL) {
@@ -450,7 +492,8 @@ void ppce500_init(PPCE500Params *params)
         irqs[i][OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
         irqs[i][OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
         env->spr[SPR_BOOKE_PIR] = env->cpu_index = i;
-        env->mpic_cpu_base = MPC8544_MPIC_REGS_BASE + 0x20000;
+        env->mpic_cpu_base = MPC8544_CCSRBAR_BASE +
+                              MPC8544_MPIC_REGS_OFFSET + 0x20000;
 
         ppc_booke_timers_init(env, 400000000, PPC_TIMER_E500);
 
@@ -477,35 +520,69 @@ void ppce500_init(PPCE500Params *params)
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(address_space_mem, 0, ram);
 
+    dev = qdev_create(NULL, "e500-ccsr");
+    object_property_add_child(qdev_get_machine(), "e500-ccsr",
+                              OBJECT(dev), NULL);
+    qdev_init_nofail(dev);
+    ccsr = CCSR(dev);
+    ccsr_addr_space = &ccsr->ccsr_space;
+    memory_region_add_subregion(address_space_mem, MPC8544_CCSRBAR_BASE,
+                                ccsr_addr_space);
+
     /* MPIC */
-    mpic = mpic_init(address_space_mem, MPC8544_MPIC_REGS_BASE,
-                     smp_cpus, irqs, NULL);
+    mpic = g_new(qemu_irq, 256);
+    dev = qdev_create(NULL, "openpic");
+    qdev_prop_set_uint32(dev, "nb_cpus", smp_cpus);
+    qdev_prop_set_uint32(dev, "model", OPENPIC_MODEL_FSL_MPIC_20);
+    qdev_init_nofail(dev);
+    s = sysbus_from_qdev(dev);
+
+    k = 0;
+    for (i = 0; i < smp_cpus; i++) {
+        for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+            sysbus_connect_irq(s, k++, irqs[i][j]);
+        }
+    }
 
-    if (!mpic) {
-        cpu_abort(env, "MPIC failed to initialize\n");
+    for (i = 0; i < 256; i++) {
+        mpic[i] = qdev_get_gpio_in(dev, i);
     }
 
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_MPIC_REGS_OFFSET,
+                                s->mmio[0].memory);
+
     /* Serial */
     if (serial_hds[0]) {
-        serial_mm_init(address_space_mem, MPC8544_SERIAL0_REGS_BASE,
-                       0, mpic[12+26], 399193,
+        serial_mm_init(ccsr_addr_space, MPC8544_SERIAL0_REGS_OFFSET,
+                       0, mpic[42], 399193,
                        serial_hds[0], DEVICE_BIG_ENDIAN);
     }
 
     if (serial_hds[1]) {
-        serial_mm_init(address_space_mem, MPC8544_SERIAL1_REGS_BASE,
-                       0, mpic[12+26], 399193,
+        serial_mm_init(ccsr_addr_space, MPC8544_SERIAL1_REGS_OFFSET,
+                       0, mpic[42], 399193,
                        serial_hds[1], DEVICE_BIG_ENDIAN);
     }
 
     /* General Utility device */
-    sysbus_create_simple("mpc8544-guts", MPC8544_UTIL_BASE, NULL);
+    dev = qdev_create(NULL, "mpc8544-guts");
+    qdev_init_nofail(dev);
+    s = SYS_BUS_DEVICE(dev);
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_UTIL_OFFSET,
+                                sysbus_mmio_get_region(s, 0));
 
     /* PCI */
-    dev = sysbus_create_varargs("e500-pcihost", MPC8544_PCI_REGS_BASE,
-                                mpic[pci_irq_nrs[0]], mpic[pci_irq_nrs[1]],
-                                mpic[pci_irq_nrs[2]], mpic[pci_irq_nrs[3]],
-                                NULL);
+    dev = qdev_create(NULL, "e500-pcihost");
+    qdev_prop_set_uint32(dev, "first_slot", params->pci_first_slot);
+    qdev_init_nofail(dev);
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_connect_irq(s, 0, mpic[pci_irq_nrs[0]]);
+    sysbus_connect_irq(s, 1, mpic[pci_irq_nrs[1]]);
+    sysbus_connect_irq(s, 2, mpic[pci_irq_nrs[2]]);
+    sysbus_connect_irq(s, 3, mpic[pci_irq_nrs[3]]);
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET,
+                                sysbus_mmio_get_region(s, 0));
+
     pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
     if (!pci_bus)
         printf("couldn't create PCI controller!\n");
@@ -578,3 +655,33 @@ void ppce500_init(PPCE500Params *params)
         kvmppc_init();
     }
 }
+
+static int e500_ccsr_initfn(SysBusDevice *dev)
+{
+    PPCE500CCSRState *ccsr;
+
+    ccsr = CCSR(dev);
+    memory_region_init(&ccsr->ccsr_space, "e500-ccsr",
+                       MPC8544_CCSRBAR_SIZE);
+    return 0;
+}
+
+static void e500_ccsr_class_init(ObjectClass *klass, void *data)
+{
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+    k->init = e500_ccsr_initfn;
+}
+
+static const TypeInfo e500_ccsr_info = {
+    .name          = TYPE_CCSR,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(PPCE500CCSRState),
+    .class_init    = e500_ccsr_class_init,
+};
+
+static void e500_register_types(void)
+{
+    type_register_static(&e500_ccsr_info);
+}
+
+type_init(e500_register_types)
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
index 7ae87f4e21..f5ff27385b 100644
--- a/hw/ppc/e500.h
+++ b/hw/ppc/e500.h
@@ -9,6 +9,8 @@ typedef struct PPCE500Params {
     const char *kernel_cmdline;
     const char *initrd_filename;
     const char *cpu_model;
+    int pci_first_slot;
+    int pci_nr_slots;
 
     /* e500-specific params */
 
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index 4cfb94061a..2992bd9794 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -14,6 +14,7 @@
 #include "e500.h"
 #include "../boards.h"
 #include "device_tree.h"
+#include "hw/pci.h"
 
 static void e500plat_fixup_devtree(PPCE500Params *params, void *fdt)
 {
@@ -40,6 +41,8 @@ static void e500plat_init(QEMUMachineInitArgs *args)
         .kernel_cmdline = kernel_cmdline,
         .initrd_filename = initrd_filename,
         .cpu_model = cpu_model,
+        .pci_first_slot = 0x1,
+        .pci_nr_slots = PCI_SLOT_MAX - 1,
         .fixup_devtree = e500plat_fixup_devtree,
     };
 
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
index e651661941..7e1761d20c 100644
--- a/hw/ppc/mpc8544ds.c
+++ b/hw/ppc/mpc8544ds.c
@@ -40,6 +40,8 @@ static void mpc8544ds_init(QEMUMachineInitArgs *args)
         .kernel_cmdline = kernel_cmdline,
         .initrd_filename = initrd_filename,
         .cpu_model = cpu_model,
+        .pci_first_slot = 0x11,
+        .pci_nr_slots = 2,
         .fixup_devtree = mpc8544ds_fixup_devtree,
     };
 
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index c1ff9d7c31..2bf3094e9f 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -67,6 +67,7 @@
 #include "hw/usb.h"
 #include "blockdev.h"
 #include "exec-memory.h"
+#include "sysbus.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
@@ -141,7 +142,7 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
     char *filename;
     qemu_irq *pic, **openpic_irqs;
     MemoryRegion *unin_memory = g_new(MemoryRegion, 1);
-    int linux_boot, i;
+    int linux_boot, i, j, k;
     MemoryRegion *ram = g_new(MemoryRegion, 1), *bios = g_new(MemoryRegion, 1);
     hwaddr kernel_base, initrd_base, cmdline_base = 0;
     long kernel_size, initrd_size;
@@ -156,6 +157,8 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
     void *fw_cfg;
     void *dbdma;
     int machine_arch;
+    SysBusDevice *s;
+    DeviceState *dev;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -320,7 +323,25 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
             exit(1);
         }
     }
-    pic = openpic_init(&pic_mem, smp_cpus, openpic_irqs, NULL);
+
+    pic = g_new(qemu_irq, 64);
+
+    dev = qdev_create(NULL, "openpic");
+    qdev_prop_set_uint32(dev, "model", OPENPIC_MODEL_RAVEN);
+    qdev_init_nofail(dev);
+    s = sysbus_from_qdev(dev);
+    pic_mem = s->mmio[0].memory;
+    k = 0;
+    for (i = 0; i < smp_cpus; i++) {
+        for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+            sysbus_connect_irq(s, k++, openpic_irqs[i][j]);
+        }
+    }
+
+    for (i = 0; i < 64; i++) {
+        pic[i] = qdev_get_gpio_in(dev, i);
+    }
+
     if (PPC_INPUT(env) == PPC_FLAGS_INPUT_970) {
         /* 970 gets a U3 bus */
         pci_bus = pci_pmac_u3_init(pic, get_system_memory(), get_system_io());
diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 39022aada0..9bffbb9f87 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -15,9 +15,11 @@
  */
 
 #include "hw.h"
+#include "hw/ppc/e500-ccsr.h"
 #include "pci/pci.h"
 #include "pci/pci_host.h"
 #include "bswap.h"
+#include "ppce500_pci.h"
 
 #ifdef DEBUG_PCI
 #define pci_debug(fmt, ...) fprintf(stderr, fmt, ## __VA_ARGS__)
@@ -86,12 +88,26 @@ struct PPCE500PCIState {
     struct pci_inbound pib[PPCE500_PCI_NR_PIBS];
     uint32_t gasket_time;
     qemu_irq irq[4];
+    uint32_t first_slot;
     /* mmio maps */
     MemoryRegion container;
     MemoryRegion iomem;
     MemoryRegion pio;
 };
 
+#define TYPE_PPC_E500_PCI_BRIDGE "e500-host-bridge"
+#define PPC_E500_PCI_BRIDGE(obj) \
+    OBJECT_CHECK(PPCE500PCIBridgeState, (obj), TYPE_PPC_E500_PCI_BRIDGE)
+
+struct PPCE500PCIBridgeState {
+    /*< private >*/
+    PCIDevice parent;
+    /*< public >*/
+
+    MemoryRegion bar0;
+};
+
+typedef struct PPCE500PCIBridgeState PPCE500PCIBridgeState;
 typedef struct PPCE500PCIState PPCE500PCIState;
 
 static uint64_t pci_reg_read4(void *opaque, hwaddr addr,
@@ -238,17 +254,10 @@ static const MemoryRegionOps e500_pci_reg_ops = {
 
 static int mpc85xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
 {
-    int devno = pci_dev->devfn >> 3, ret = 0;
+    int devno = pci_dev->devfn >> 3;
+    int ret;
 
-    switch (devno) {
-        /* Two PCI slot */
-        case 0x11:
-        case 0x12:
-            ret = (irq_num + devno - 0x10) % 4;
-            break;
-        default:
-            printf("Error:%s:unknown dev number\n", __func__);
-    }
+    ret = ppce500_pci_map_irq_slot(devno, irq_num);
 
     pci_debug("%s: devfn %x irq %d -> %d  devno:%x\n", __func__,
            pci_dev->devfn, irq_num, ret, devno);
@@ -310,6 +319,24 @@ static const VMStateDescription vmstate_ppce500_pci = {
 
 #include "exec-memory.h"
 
+static int e500_pcihost_bridge_initfn(PCIDevice *d)
+{
+    PPCE500PCIBridgeState *b = PPC_E500_PCI_BRIDGE(d);
+    PPCE500CCSRState *ccsr = CCSR(container_get(qdev_get_machine(),
+                                  "/e500-ccsr"));
+
+    pci_config_set_class(d->config, PCI_CLASS_BRIDGE_PCI);
+    d->config[PCI_HEADER_TYPE] =
+        (d->config[PCI_HEADER_TYPE] & PCI_HEADER_TYPE_MULTI_FUNCTION) |
+        PCI_HEADER_TYPE_BRIDGE;
+
+    memory_region_init_alias(&b->bar0, "e500-pci-bar0", &ccsr->ccsr_space,
+                             0, int128_get64(ccsr->ccsr_space.size));
+    pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &b->bar0);
+
+    return 0;
+}
+
 static int e500_pcihost_initfn(SysBusDevice *dev)
 {
     PCIHostState *h;
@@ -329,7 +356,7 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
 
     b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq,
                          mpc85xx_pci_map_irq, s->irq, address_space_mem,
-                         &s->pio, PCI_DEVFN(0x11, 0), 4);
+                         &s->pio, PCI_DEVFN(s->first_slot, 0), 4);
     h->bus = b;
 
     pci_create_simple(b, 0, "e500-host-bridge");
@@ -355,6 +382,7 @@ static void e500_host_bridge_class_init(ObjectClass *klass, void *data)
     DeviceClass *dc = DEVICE_CLASS(klass);
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
+    k->init = e500_pcihost_bridge_initfn;
     k->vendor_id = PCI_VENDOR_ID_FREESCALE;
     k->device_id = PCI_DEVICE_ID_MPC8533E;
     k->class_id = PCI_CLASS_PROCESSOR_POWERPC;
@@ -364,16 +392,22 @@ static void e500_host_bridge_class_init(ObjectClass *klass, void *data)
 static const TypeInfo e500_host_bridge_info = {
     .name          = "e500-host-bridge",
     .parent        = TYPE_PCI_DEVICE,
-    .instance_size = sizeof(PCIDevice),
+    .instance_size = sizeof(PPCE500PCIBridgeState),
     .class_init    = e500_host_bridge_class_init,
 };
 
+static Property pcihost_properties[] = {
+    DEFINE_PROP_UINT32("first_slot", PPCE500PCIState, first_slot, 0x11),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
 static void e500_pcihost_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
     k->init = e500_pcihost_initfn;
+    dc->props = pcihost_properties;
     dc->vmsd = &vmstate_ppce500_pci;
 }
 
diff --git a/hw/ppce500_pci.h b/hw/ppce500_pci.h
new file mode 100644
index 0000000000..61f773ef30
--- /dev/null
+++ b/hw/ppce500_pci.h
@@ -0,0 +1,9 @@
+#ifndef PPCE500_PCI_H
+#define PPCE500_PCI_H
+
+static inline int ppce500_pci_map_irq_slot(int devno, int irq_num)
+{
+    return (devno + irq_num) % 4;
+}
+
+#endif
diff --git a/hw/puv3.c b/hw/puv3.c
index 764799cff4..3d7734936b 100644
--- a/hw/puv3.c
+++ b/hw/puv3.c
@@ -122,7 +122,6 @@ static QEMUMachine puv3_machine = {
     .desc = "PKUnity Version-3 based on UniCore32",
     .init = puv3_init,
     .is_default = 1,
-    .use_scsi = 0,
 };
 
 static void puv3_machine_init(void)
diff --git a/hw/realview.c b/hw/realview.c
index 149bb562af..5fbdcbf2b0 100644
--- a/hw/realview.c
+++ b/hw/realview.c
@@ -364,14 +364,14 @@ static QEMUMachine realview_eb_machine = {
     .name = "realview-eb",
     .desc = "ARM RealView Emulation Baseboard (ARM926EJ-S)",
     .init = realview_eb_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine realview_eb_mpcore_machine = {
     .name = "realview-eb-mpcore",
     .desc = "ARM RealView Emulation Baseboard (ARM11MPCore)",
     .init = realview_eb_mpcore_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -385,7 +385,7 @@ static QEMUMachine realview_pbx_a9_machine = {
     .name = "realview-pbx-a9",
     .desc = "ARM RealView Platform Baseboard Explore for Cortex-A9",
     .init = realview_pbx_a9_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index ca1bb09816..7aca0c4aad 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -314,21 +314,6 @@ static void s390_init(QEMUMachineInitArgs *args)
         qdev_set_nic_properties(dev, nd);
         qdev_init_nofail(dev);
     }
-
-    /* Create VirtIO disk drives */
-    for(i = 0; i < MAX_BLK_DEVS; i++) {
-        DriveInfo *dinfo;
-        DeviceState *dev;
-
-        dinfo = drive_get(IF_IDE, 0, i);
-        if (!dinfo) {
-            continue;
-        }
-
-        dev = qdev_create((BusState *)s390_bus, "virtio-blk-s390");
-        qdev_prop_set_drive_nofail(dev, "drive", dinfo->bdrv);
-        qdev_init_nofail(dev);
-    }
 }
 
 static QEMUMachine s390_machine = {
@@ -336,6 +321,7 @@ static QEMUMachine s390_machine = {
     .alias = "s390",
     .desc = "VirtIO based S390 machine",
     .init = s390_init,
+    .block_default_type = IF_VIRTIO,
     .no_cdrom = 1,
     .no_floppy = 1,
     .no_serial = 1,
@@ -352,3 +338,4 @@ static void s390_machine_init(void)
 }
 
 machine_init(s390_machine_init);
+
diff --git a/hw/spapr.c b/hw/spapr.c
index b0125a892c..9bd2fd5c8c 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -657,6 +657,36 @@ static void spapr_cpu_reset(void *opaque)
         (spapr->htab_shift - 18);
 }
 
+static void spapr_create_nvram(sPAPREnvironment *spapr)
+{
+    QemuOpts *machine_opts;
+    DeviceState *dev;
+
+    dev = qdev_create(&spapr->vio_bus->bus, "spapr-nvram");
+
+    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (machine_opts) {
+        const char *drivename;
+
+        drivename = qemu_opt_get(machine_opts, "nvram");
+        if (drivename) {
+            BlockDriverState *bs;
+
+            bs = bdrv_find(drivename);
+            if (!bs) {
+                fprintf(stderr, "No such block device \"%s\" for nvram\n",
+                        drivename);
+                exit(1);
+            }
+            qdev_prop_set_drive_nofail(dev, "drive", bs);
+        }
+    }
+
+    qdev_init_nofail(dev);
+
+    spapr->nvram = (struct sPAPRNVRAM *)dev;
+}
+
 /* Returns whether we want to use VGA or not */
 static int spapr_vga_init(PCIBus *pci_bus)
 {
@@ -801,7 +831,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
 
     /* Set up Interrupt Controller */
     spapr->icp = xics_system_init(XICS_IRQS);
-    spapr->next_irq = 16;
+    spapr->next_irq = XICS_IRQ_BASE;
 
     /* Set up EPOW events infrastructure */
     spapr_events_init(spapr);
@@ -818,6 +848,9 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
         }
     }
 
+    /* We always have at least the nvram device on VIO */
+    spapr_create_nvram(spapr);
+
     /* Set up PCI */
     spapr_pci_rtas_init();
 
@@ -924,9 +957,9 @@ static QEMUMachine spapr_machine = {
     .desc = "pSeries Logical Partition (PAPR compliant)",
     .init = ppc_spapr_init,
     .reset = ppc_spapr_reset,
+    .block_default_type = IF_SCSI,
     .max_cpus = MAX_CPUS,
     .no_parallel = 1,
-    .use_scsi = 1,
 };
 
 static void spapr_machine_init(void)
diff --git a/hw/spapr.h b/hw/spapr.h
index efe7f5758f..600722f132 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -6,11 +6,13 @@
 
 struct VIOsPAPRBus;
 struct sPAPRPHBState;
+struct sPAPRNVRAM;
 struct icp_state;
 
 typedef struct sPAPREnvironment {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
+    struct sPAPRNVRAM *nvram;
     struct icp_state *icp;
 
     hwaddr ram_limit;
@@ -320,7 +322,7 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val)
 typedef void (*spapr_rtas_fn)(sPAPREnvironment *spapr, uint32_t token,
                               uint32_t nargs, target_ulong args,
                               uint32_t nret, target_ulong rets);
-void spapr_rtas_register(const char *name, spapr_rtas_fn fn);
+int spapr_rtas_register(const char *name, spapr_rtas_fn fn);
 target_ulong spapr_rtas_call(sPAPREnvironment *spapr,
                              uint32_t token, uint32_t nargs, target_ulong args,
                              uint32_t nret, target_ulong rets);
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 02d78ccf28..3011b251d3 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -120,6 +120,12 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
 {
     sPAPRTCETable *tcet;
 
+    if (spapr_tce_find_by_liobn(liobn)) {
+        fprintf(stderr, "Attempted to create TCE table with duplicate"
+                " LIOBN 0x%x\n", liobn);
+        return NULL;
+    }
+
     if (!window_size) {
         return NULL;
     }
diff --git a/hw/spapr_nvram.c b/hw/spapr_nvram.c
new file mode 100644
index 0000000000..512bb8d5d1
--- /dev/null
+++ b/hw/spapr_nvram.c
@@ -0,0 +1,196 @@
+/*
+ * QEMU sPAPR NVRAM emulation
+ *
+ * Copyright (C) 2012 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <sys/mman.h>
+#include <libfdt.h>
+
+#include "device_tree.h"
+#include "hw/sysbus.h"
+#include "hw/spapr.h"
+#include "hw/spapr_vio.h"
+
+typedef struct sPAPRNVRAM {
+    VIOsPAPRDevice sdev;
+    uint32_t size;
+    uint8_t *buf;
+    BlockDriverState *drive;
+} sPAPRNVRAM;
+
+#define MIN_NVRAM_SIZE 8192
+#define DEFAULT_NVRAM_SIZE 65536
+#define MAX_NVRAM_SIZE (UINT16_MAX * 16)
+
+static void rtas_nvram_fetch(sPAPREnvironment *spapr,
+                             uint32_t token, uint32_t nargs,
+                             target_ulong args,
+                             uint32_t nret, target_ulong rets)
+{
+    sPAPRNVRAM *nvram = spapr->nvram;
+    hwaddr offset, buffer, len;
+    int alen;
+    void *membuf;
+
+    if ((nargs != 3) || (nret != 2)) {
+        rtas_st(rets, 0, -3);
+        return;
+    }
+
+    if (!nvram) {
+        rtas_st(rets, 0, -1);
+        rtas_st(rets, 1, 0);
+        return;
+    }
+
+    offset = rtas_ld(args, 0);
+    buffer = rtas_ld(args, 1);
+    len = rtas_ld(args, 2);
+
+    if (((offset + len) < offset)
+        || ((offset + len) > nvram->size)) {
+        rtas_st(rets, 0, -3);
+        rtas_st(rets, 1, 0);
+        return;
+    }
+
+    membuf = cpu_physical_memory_map(buffer, &len, 1);
+    if (nvram->drive) {
+        alen = bdrv_pread(nvram->drive, offset, membuf, len);
+    } else {
+        assert(nvram->buf);
+
+        memcpy(membuf, nvram->buf + offset, len);
+        alen = len;
+    }
+    cpu_physical_memory_unmap(membuf, len, 1, len);
+
+    rtas_st(rets, 0, (alen < len) ? -1 : 0);
+    rtas_st(rets, 1, (alen < 0) ? 0 : alen);
+}
+
+static void rtas_nvram_store(sPAPREnvironment *spapr,
+                             uint32_t token, uint32_t nargs,
+                             target_ulong args,
+                             uint32_t nret, target_ulong rets)
+{
+    sPAPRNVRAM *nvram = spapr->nvram;
+    hwaddr offset, buffer, len;
+    int alen;
+    void *membuf;
+
+    if ((nargs != 3) || (nret != 2)) {
+        rtas_st(rets, 0, -3);
+        return;
+    }
+
+    if (!nvram) {
+        rtas_st(rets, 0, -1);
+        return;
+    }
+
+    offset = rtas_ld(args, 0);
+    buffer = rtas_ld(args, 1);
+    len = rtas_ld(args, 2);
+
+    if (((offset + len) < offset)
+        || ((offset + len) > nvram->size)) {
+        rtas_st(rets, 0, -3);
+        return;
+    }
+
+    membuf = cpu_physical_memory_map(buffer, &len, 0);
+    if (nvram->drive) {
+        alen = bdrv_pwrite(nvram->drive, offset, membuf, len);
+    } else {
+        assert(nvram->buf);
+
+        memcpy(nvram->buf + offset, membuf, len);
+        alen = len;
+    }
+    cpu_physical_memory_unmap(membuf, len, 0, len);
+
+    rtas_st(rets, 0, (alen < len) ? -1 : 0);
+    rtas_st(rets, 1, (alen < 0) ? 0 : alen);
+}
+
+static int spapr_nvram_init(VIOsPAPRDevice *dev)
+{
+    sPAPRNVRAM *nvram = (sPAPRNVRAM *)dev;
+
+    if (nvram->drive) {
+        nvram->size = bdrv_getlength(nvram->drive);
+    } else {
+        nvram->size = DEFAULT_NVRAM_SIZE;
+        nvram->buf = g_malloc0(nvram->size);
+    }
+
+    if ((nvram->size < MIN_NVRAM_SIZE) || (nvram->size > MAX_NVRAM_SIZE)) {
+        fprintf(stderr, "spapr-nvram must be between %d and %d bytes in size\n",
+                MIN_NVRAM_SIZE, MAX_NVRAM_SIZE);
+        return -1;
+    }
+
+    spapr_rtas_register("nvram-fetch", rtas_nvram_fetch);
+    spapr_rtas_register("nvram-store", rtas_nvram_store);
+
+    return 0;
+}
+
+static int spapr_nvram_devnode(VIOsPAPRDevice *dev, void *fdt, int node_off)
+{
+    sPAPRNVRAM *nvram = (sPAPRNVRAM *)dev;
+
+    return fdt_setprop_cell(fdt, node_off, "#bytes", nvram->size);
+}
+
+static Property spapr_nvram_properties[] = {
+    DEFINE_SPAPR_PROPERTIES(sPAPRNVRAM, sdev),
+    DEFINE_PROP_DRIVE("drive", sPAPRNVRAM, drive),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_nvram_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VIOsPAPRDeviceClass *k = VIO_SPAPR_DEVICE_CLASS(klass);
+
+    k->init = spapr_nvram_init;
+    k->devnode = spapr_nvram_devnode;
+    k->dt_name = "nvram";
+    k->dt_type = "nvram";
+    k->dt_compatible = "qemu,spapr-nvram";
+    dc->props = spapr_nvram_properties;
+}
+
+static const TypeInfo spapr_nvram_type_info = {
+    .name          = "spapr-nvram",
+    .parent        = TYPE_VIO_SPAPR_DEVICE,
+    .instance_size = sizeof(sPAPRNVRAM),
+    .class_init    = spapr_nvram_class_init,
+};
+
+static void spapr_nvram_register_types(void)
+{
+    type_register_static(&spapr_nvram_type_info);
+}
+
+type_init(spapr_nvram_register_types)
diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c
index 6d5c48a740..e618c2db53 100644
--- a/hw/spapr_rtas.c
+++ b/hw/spapr_rtas.c
@@ -242,7 +242,7 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr,
     return H_PARAMETER;
 }
 
-void spapr_rtas_register(const char *name, spapr_rtas_fn fn)
+int spapr_rtas_register(const char *name, spapr_rtas_fn fn)
 {
     int i;
 
@@ -258,7 +258,7 @@ void spapr_rtas_register(const char *name, spapr_rtas_fn fn)
     rtas_next->name = name;
     rtas_next->fn = fn;
 
-    rtas_next++;
+    return (rtas_next++ - rtas_table) + TOKEN_BASE;
 }
 
 int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
@@ -301,7 +301,7 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr rtas_addr,
     for (i = 0; i < TOKEN_MAX; i++) {
         struct rtas_call *call = &rtas_table[i];
 
-        if (!call->fn) {
+        if (!call->name) {
             continue;
         }
 
diff --git a/hw/sun4m.c b/hw/sun4m.c
index 1a786762aa..52cf82b681 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -1426,7 +1426,7 @@ static QEMUMachine ss5_machine = {
     .name = "SS-5",
     .desc = "Sun4m platform, SPARCstation 5",
     .init = ss5_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .is_default = 1,
 };
 
@@ -1434,7 +1434,7 @@ static QEMUMachine ss10_machine = {
     .name = "SS-10",
     .desc = "Sun4m platform, SPARCstation 10",
     .init = ss10_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -1442,7 +1442,7 @@ static QEMUMachine ss600mp_machine = {
     .name = "SS-600MP",
     .desc = "Sun4m platform, SPARCserver 600MP",
     .init = ss600mp_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -1450,7 +1450,7 @@ static QEMUMachine ss20_machine = {
     .name = "SS-20",
     .desc = "Sun4m platform, SPARCstation 20",
     .init = ss20_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -1458,35 +1458,35 @@ static QEMUMachine voyager_machine = {
     .name = "Voyager",
     .desc = "Sun4m platform, SPARCstation Voyager",
     .init = vger_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine ss_lx_machine = {
     .name = "LX",
     .desc = "Sun4m platform, SPARCstation LX",
     .init = ss_lx_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine ss4_machine = {
     .name = "SS-4",
     .desc = "Sun4m platform, SPARCstation 4",
     .init = ss4_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine scls_machine = {
     .name = "SPARCClassic",
     .desc = "Sun4m platform, SPARCClassic",
     .init = scls_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine sbook_machine = {
     .name = "SPARCbook",
     .desc = "Sun4m platform, SPARCbook",
     .init = sbook_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static const struct sun4d_hwdef sun4d_hwdefs[] = {
@@ -1709,7 +1709,7 @@ static QEMUMachine ss1000_machine = {
     .name = "SS-1000",
     .desc = "Sun4d platform, SPARCserver 1000",
     .init = ss1000_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 8,
 };
 
@@ -1717,7 +1717,7 @@ static QEMUMachine ss2000_machine = {
     .name = "SS-2000",
     .desc = "Sun4d platform, SPARCcenter 2000",
     .init = ss2000_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 20,
 };
 
@@ -1896,7 +1896,7 @@ static QEMUMachine ss2_machine = {
     .name = "SS-2",
     .desc = "Sun4c platform, SPARCstation 2",
     .init = ss2_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static void sun4m_register_types(void)
diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index 41e39d8fb9..f5a742b37f 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -358,14 +358,14 @@ static QEMUMachine versatilepb_machine = {
     .name = "versatilepb",
     .desc = "ARM Versatile/PB (ARM926EJ-S)",
     .init = vpb_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static QEMUMachine versatileab_machine = {
     .name = "versatileab",
     .desc = "ARM Versatile/AB (ARM926EJ-S)",
     .init = vab_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
 };
 
 static void versatile_machine_init(void)
diff --git a/hw/vexpress.c b/hw/vexpress.c
index d93f057bff..e89694c86e 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -477,7 +477,7 @@ static QEMUMachine vexpress_a9_machine = {
     .name = "vexpress-a9",
     .desc = "ARM Versatile Express for Cortex-A9",
     .init = vexpress_a9_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
@@ -485,7 +485,7 @@ static QEMUMachine vexpress_a15_machine = {
     .name = "vexpress-a15",
     .desc = "ARM Versatile Express for Cortex-A15",
     .init = vexpress_a15_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 4,
 };
 
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 45d90ab490..264e58a68b 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -275,7 +275,7 @@ static void vfio_enable_intx_kvm(VFIODevice *vdev)
     int ret, argsz;
     int32_t *pfd;
 
-    if (!kvm_irqchip_in_kernel() ||
+    if (!kvm_irqfds_enabled() ||
         vdev->intx.route.mode != PCI_INTX_ENABLED ||
         !kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) {
         return;
@@ -438,7 +438,8 @@ static int vfio_enable_intx(VFIODevice *vdev)
      * Only conditional to avoid generating error messages on platforms
      * where we won't actually use the result anyway.
      */
-    if (kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) {
+    if (kvm_irqfds_enabled() &&
+        kvm_check_extension(kvm_state, KVM_CAP_IRQFD_RESAMPLE)) {
         vdev->intx.route = pci_device_route_intx_to_irq(&vdev->pdev,
                                                         vdev->intx.pin);
     }
diff --git a/hw/vga.c b/hw/vga.c
index 6d56f8a5d9..ab40d73100 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -2413,7 +2413,7 @@ void ppm_save(const char *filename, struct DisplaySurface *ds, Error **errp)
     }
     linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
     for (y = 0; y < height; y++) {
-        qemu_pixman_linebuf_fill(linebuf, ds->image, width, y);
+        qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
         clearerr(f);
         ret = fwrite(pixman_image_get_data(linebuf), 1,
                      pixman_image_get_stride(linebuf), f);
diff --git a/hw/virtio-blk.h b/hw/virtio-blk.h
index f0740d01af..651a000b9f 100644
--- a/hw/virtio-blk.h
+++ b/hw/virtio-blk.h
@@ -104,7 +104,6 @@ struct VirtIOBlkConf
     BlockConf conf;
     char *serial;
     uint32_t scsi;
-    uint32_t config_wce;
 };
 
 #define DEFINE_VIRTIO_BLK_FEATURES(_state, _field) \
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index e9b722dd96..d0d6a5e816 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -895,7 +895,6 @@ static Property virtio_blk_properties[] = {
 #ifdef __linux__
     DEFINE_PROP_BIT("scsi", VirtIOPCIProxy, blk.scsi, 0, true),
 #endif
-    DEFINE_PROP_BIT("config-wce", VirtIOPCIProxy, blk.config_wce, 0, true),
     DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true),
     DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
     DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
diff --git a/hw/watchdog.c b/hw/watchdog.c
index b52acedd98..5c82c17d09 100644
--- a/hw/watchdog.c
+++ b/hw/watchdog.c
@@ -66,7 +66,7 @@ int select_watchdog(const char *p)
     QLIST_FOREACH(model, &watchdog_list, entry) {
         if (strcasecmp(model->wdt_name, p) == 0) {
             /* add the device */
-            opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL);
+            opts = qemu_opts_create_nofail(qemu_find_opts("device"));
             qemu_opt_set(opts, "driver", p);
             return 0;
         }
diff --git a/hw/xics.c b/hw/xics.c
index 1da310653b..55899ce77d 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -26,6 +26,7 @@
  */
 
 #include "hw.h"
+#include "trace.h"
 #include "hw/spapr.h"
 #include "hw/xics.h"
 
@@ -66,6 +67,8 @@ static void icp_check_ipi(struct icp_state *icp, int server)
         return;
     }
 
+    trace_xics_icp_check_ipi(server, ss->mfrr);
+
     if (XISR(ss)) {
         ics_reject(icp->ics, XISR(ss));
     }
@@ -120,11 +123,13 @@ static void icp_set_mfrr(struct icp_state *icp, int server, uint8_t mfrr)
 
 static uint32_t icp_accept(struct icp_server_state *ss)
 {
-    uint32_t xirr;
+    uint32_t xirr = ss->xirr;
 
     qemu_irq_lower(ss->output);
-    xirr = ss->xirr;
     ss->xirr = ss->pending_priority << 24;
+
+    trace_xics_icp_accept(xirr, ss->xirr);
+
     return xirr;
 }
 
@@ -134,6 +139,7 @@ static void icp_eoi(struct icp_state *icp, int server, uint32_t xirr)
 
     /* Send EOI -> ICS */
     ss->xirr = (ss->xirr & ~CPPR_MASK) | (xirr & CPPR_MASK);
+    trace_xics_icp_eoi(server, xirr, ss->xirr);
     ics_eoi(icp->ics, xirr & XISR_MASK);
     if (!XISR(ss)) {
         icp_resend(icp, server);
@@ -144,6 +150,8 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
 {
     struct icp_server_state *ss = icp->ss + server;
 
+    trace_xics_icp_irq(server, nr, priority);
+
     if ((priority >= CPPR(ss))
         || (XISR(ss) && (ss->pending_priority <= priority))) {
         ics_reject(icp->ics, nr);
@@ -153,6 +161,7 @@ static void icp_irq(struct icp_state *icp, int server, int nr, uint8_t priority)
         }
         ss->xirr = (ss->xirr & ~XISR_MASK) | (nr & XISR_MASK);
         ss->pending_priority = priority;
+        trace_xics_icp_raise(ss->xirr, ss->pending_priority);
         qemu_irq_raise(ss->output);
     }
 }
@@ -170,13 +179,13 @@ struct ics_irq_state {
 #define XICS_STATUS_REJECTED           0x4
 #define XICS_STATUS_MASKED_PENDING     0x8
     uint8_t status;
-    bool lsi;
 };
 
 struct ics_state {
     int nr_irqs;
     int offset;
     qemu_irq *qirqs;
+    bool *islsi;
     struct ics_irq_state *irqs;
     struct icp_state *icp;
 };
@@ -217,10 +226,12 @@ static void set_irq_msi(struct ics_state *ics, int srcno, int val)
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
+    trace_xics_set_irq_msi(srcno, srcno + ics->offset);
+
     if (val) {
         if (irq->priority == 0xff) {
             irq->status |= XICS_STATUS_MASKED_PENDING;
-            /* masked pending */ ;
+            trace_xics_masked_pending();
         } else  {
             icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
         }
@@ -231,6 +242,7 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val)
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
+    trace_xics_set_irq_lsi(srcno, srcno + ics->offset);
     if (val) {
         irq->status |= XICS_STATUS_ASSERTED;
     } else {
@@ -242,9 +254,8 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val)
 static void ics_set_irq(void *opaque, int srcno, int val)
 {
     struct ics_state *ics = (struct ics_state *)opaque;
-    struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (irq->lsi) {
+    if (ics->islsi[srcno]) {
         set_irq_lsi(ics, srcno, val);
     } else {
         set_irq_msi(ics, srcno, val);
@@ -279,7 +290,9 @@ static void ics_write_xive(struct ics_state *ics, int nr, int server,
     irq->priority = priority;
     irq->saved_priority = saved_priority;
 
-    if (irq->lsi) {
+    trace_xics_ics_write_xive(nr, srcno, server, priority);
+
+    if (ics->islsi[srcno]) {
         write_xive_lsi(ics, srcno);
     } else {
         write_xive_msi(ics, srcno);
@@ -290,6 +303,7 @@ static void ics_reject(struct ics_state *ics, int nr)
 {
     struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
 
+    trace_xics_ics_reject(nr, nr - ics->offset);
     irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI */
     irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */
 }
@@ -299,10 +313,8 @@ static void ics_resend(struct ics_state *ics)
     int i;
 
     for (i = 0; i < ics->nr_irqs; i++) {
-        struct ics_irq_state *irq = ics->irqs + i;
-
         /* FIXME: filter by server#? */
-        if (irq->lsi) {
+        if (ics->islsi[i]) {
             resend_lsi(ics, i);
         } else {
             resend_msi(ics, i);
@@ -315,7 +327,9 @@ static void ics_eoi(struct ics_state *ics, int nr)
     int srcno = nr - ics->offset;
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (irq->lsi) {
+    trace_xics_ics_eoi(nr);
+
+    if (ics->islsi[srcno]) {
         irq->status &= ~XICS_STATUS_SENT;
     }
 }
@@ -337,7 +351,7 @@ void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi)
 {
     assert(ics_valid_irq(icp->ics, irq));
 
-    icp->ics->irqs[irq - icp->ics->offset].lsi = lsi;
+    icp->ics->islsi[irq - icp->ics->offset] = lsi;
 }
 
 static target_ulong h_cppr(PowerPCCPU *cpu, sPAPREnvironment *spapr,
@@ -495,16 +509,14 @@ static void xics_reset(void *opaque)
 
     for (i = 0; i < icp->nr_servers; i++) {
         icp->ss[i].xirr = 0;
-        icp->ss[i].pending_priority = 0;
+        icp->ss[i].pending_priority = 0xff;
         icp->ss[i].mfrr = 0xff;
         /* Make all outputs are deasserted */
         qemu_set_irq(icp->ss[i].output, 0);
     }
 
+    memset(ics->irqs, 0, sizeof(struct ics_irq_state) * ics->nr_irqs);
     for (i = 0; i < ics->nr_irqs; i++) {
-        /* Reset everything *except* the type */
-        ics->irqs[i].server = 0;
-        ics->irqs[i].status = 0;
         ics->irqs[i].priority = 0xff;
         ics->irqs[i].saved_priority = 0xff;
     }
@@ -549,8 +561,9 @@ struct icp_state *xics_system_init(int nr_irqs)
 
     ics = g_malloc0(sizeof(*ics));
     ics->nr_irqs = nr_irqs;
-    ics->offset = 16;
+    ics->offset = XICS_IRQ_BASE;
     ics->irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state));
+    ics->islsi = g_malloc0(nr_irqs * sizeof(bool));
 
     icp->ics = ics;
     ics->icp = icp;
diff --git a/hw/xics.h b/hw/xics.h
index 6817268697..c3bf0083e2 100644
--- a/hw/xics.h
+++ b/hw/xics.h
@@ -28,6 +28,7 @@
 #define __XICS_H__
 
 #define XICS_IPI        0x2
+#define XICS_IRQ_BASE   0x10
 
 struct icp_state;
 
diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c
index 1f12a3d1ad..9ca22a4e7d 100644
--- a/hw/xilinx_zynq.c
+++ b/hw/xilinx_zynq.c
@@ -57,6 +57,7 @@ static inline void zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq,
     DeviceState *dev;
     SysBusDevice *busdev;
     SSIBus *spi;
+    DeviceState *flash_dev;
     int i, j;
     int num_busses =  is_qspi ? NUM_QSPI_BUSSES : 1;
     int num_ss = is_qspi ? NUM_QSPI_FLASHES : NUM_SPI_FLASHES;
@@ -81,11 +82,11 @@ static inline void zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq,
         spi = (SSIBus *)qdev_get_child_bus(dev, bus_name);
 
         for (j = 0; j < num_ss; ++j) {
-            dev = ssi_create_slave_no_init(spi, "m25p80");
-            qdev_prop_set_string(dev, "partname", "n25q128");
-            qdev_init_nofail(dev);
+            flash_dev = ssi_create_slave_no_init(spi, "m25p80");
+            qdev_prop_set_string(flash_dev, "partname", "n25q128");
+            qdev_init_nofail(flash_dev);
 
-            cs_line = qdev_get_gpio_in(dev, 0);
+            cs_line = qdev_get_gpio_in(flash_dev, 0);
             sysbus_connect_irq(busdev, i * num_ss + j + 1, cs_line);
         }
     }
@@ -200,7 +201,7 @@ static QEMUMachine zynq_machine = {
     .name = "xilinx-zynq-a9",
     .desc = "Xilinx Zynq Platform Baseboard for Cortex-A9",
     .init = zynq_init,
-    .use_scsi = 1,
+    .block_default_type = IF_SCSI,
     .max_cpus = 1,
     .no_sdcard = 1
 };
diff --git a/main-loop.c b/main-loop.c
index c87624e621..7dba6f6e35 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -432,11 +432,6 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
     return aio_bh_new(qemu_aio_context, cb, opaque);
 }
 
-void qemu_aio_flush(void)
-{
-    aio_flush(qemu_aio_context);
-}
-
 bool qemu_aio_wait(void)
 {
     return aio_poll(qemu_aio_context, true);
diff --git a/pc-bios/README b/pc-bios/README
index 303713099e..eff3de7615 100644
--- a/pc-bios/README
+++ b/pc-bios/README
@@ -17,7 +17,7 @@
 - SLOF (Slimline Open Firmware) is a free IEEE 1275 Open Firmware
   implementation for certain IBM POWER hardware.  The sources are at
   https://github.com/dgibson/SLOF, and the image currently in qemu is
-  built from git tag qemu-slof-20120731.
+  built from git tag qemu-slof-20121018.
 
 - sgabios (the Serial Graphics Adapter option ROM) provides a means for
   legacy x86 software to communicate with an attached serial console as
diff --git a/pc-bios/acpi-dsdt.aml b/pc-bios/acpi-dsdt.aml
index bb3dd83a56..18b4dc1aa5 100644
--- a/pc-bios/acpi-dsdt.aml
+++ b/pc-bios/acpi-dsdt.aml
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index dc9b57ddc9..3eefff4cf8 100644
--- a/pc-bios/bios.bin
+++ b/pc-bios/bios.bin
diff --git a/pc-bios/q35-acpi-dsdt.aml b/pc-bios/q35-acpi-dsdt.aml
new file mode 100644
index 0000000000..8a50559514
--- /dev/null
+++ b/pc-bios/q35-acpi-dsdt.aml
diff --git a/pc-bios/slof.bin b/pc-bios/slof.bin
index 84ba6b83f3..3410f4fff4 100644
--- a/pc-bios/slof.bin
+++ b/pc-bios/slof.bin
diff --git a/pixman b/pixman
-Subproject 97336fad32acf802003855cd8bd6477fa49a12e
+Subproject a5e5179b5624c99c812e9bf6e7b907e355a811e
diff --git a/qemu-aio.h b/qemu-aio.h
index 3889fe97a4..31884a8f16 100644
--- a/qemu-aio.h
+++ b/qemu-aio.h
@@ -162,10 +162,6 @@ void qemu_bh_cancel(QEMUBH *bh);
  */
 void qemu_bh_delete(QEMUBH *bh);
 
-/* Flush any pending AIO operation. This function will block until all
- * outstanding AIO operations have been completed or cancelled. */
-void aio_flush(AioContext *ctx);
-
 /* Return whether there are any pending callbacks from the GSource
  * attached to the AioContext.
  *
@@ -196,7 +192,7 @@ typedef int (AioFlushHandler)(void *opaque);
 
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
  * to qemu_set_fd_handler2.  Unlike qemu_set_fd_handler2, these callbacks will
- * be invoked when using either qemu_aio_wait() or qemu_aio_flush().
+ * be invoked when using qemu_aio_wait().
  *
  * Code that invokes AIO completion functions should rely on this function
  * instead of qemu_set_fd_handler[2].
@@ -211,7 +207,7 @@ void aio_set_fd_handler(AioContext *ctx,
 
 /* Register an event notifier and associated callbacks.  Behaves very similarly
  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
- * will be invoked when using either qemu_aio_wait() or qemu_aio_flush().
+ * will be invoked when using qemu_aio_wait().
  *
  * Code that invokes AIO completion functions should rely on this function
  * instead of event_notifier_set_handler.
@@ -228,7 +224,6 @@ GSource *aio_get_g_source(AioContext *ctx);
 
 /* Functions to operate on the main QEMU AioContext.  */
 
-void qemu_aio_flush(void);
 bool qemu_aio_wait(void);
 void qemu_aio_set_event_notifier(EventNotifier *notifier,
                                  EventNotifierHandler *io_read,
diff --git a/qemu-config.c b/qemu-config.c
index aa78fb9ea7..b4ce0d8034 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -579,6 +579,10 @@ static QemuOptsList qemu_machine_opts = {
             .name = "usb",
             .type = QEMU_OPT_BOOL,
             .help = "Set on/off to enable/disable usb",
+        }, {
+            .name = "nvram",
+            .type = QEMU_OPT_STRING,
+            .help = "Drive backing persistent NVRAM",
         },
         { /* End of list */ }
     },
@@ -756,7 +760,7 @@ int qemu_global_option(const char *str)
         return -1;
     }
 
-    opts = qemu_opts_create(&qemu_global_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&qemu_global_opts);
     qemu_opt_set(opts, "driver", driver);
     qemu_opt_set(opts, "property", property);
     qemu_opt_set(opts, "value", str+offset+1);
@@ -843,7 +847,7 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname)
                 error_free(local_err);
                 goto out;
             }
-            opts = qemu_opts_create(list, NULL, 0, NULL);
+            opts = qemu_opts_create_nofail(list);
             continue;
         }
         if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2) {
diff --git a/qemu-img.c b/qemu-img.c
index e29e01b729..c989a52564 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -294,13 +294,14 @@ static int add_old_style_options(const char *fmt, QEMUOptionParameter *list,
 
 static int img_create(int argc, char **argv)
 {
-    int c, ret = 0;
+    int c;
     uint64_t img_size = -1;
     const char *fmt = "raw";
     const char *base_fmt = NULL;
     const char *filename;
     const char *base_filename = NULL;
     char *options = NULL;
+    Error *local_err = NULL;
 
     for(;;) {
         c = getopt(argc, argv, "F:b:f:he6o:");
@@ -350,23 +351,23 @@ static int img_create(int argc, char **argv)
             error_report("Invalid image size specified! You may use k, M, G or "
                   "T suffixes for ");
             error_report("kilobytes, megabytes, gigabytes and terabytes.");
-            ret = -1;
-            goto out;
+            return 1;
         }
         img_size = (uint64_t)sval;
     }
 
     if (options && is_help_option(options)) {
-        ret = print_block_option_help(filename, fmt);
-        goto out;
+        return print_block_option_help(filename, fmt);
     }
 
-    ret = bdrv_img_create(filename, fmt, base_filename, base_fmt,
-                          options, img_size, BDRV_O_FLAGS);
-out:
-    if (ret) {
+    bdrv_img_create(filename, fmt, base_filename, base_fmt,
+                    options, img_size, BDRV_O_FLAGS, &local_err);
+    if (error_is_set(&local_err)) {
+        error_report("%s", error_get_pretty(local_err));
+        error_free(local_err);
         return 1;
     }
+
     return 0;
 }
 
@@ -1933,7 +1934,7 @@ static int img_resize(int argc, char **argv)
     }
 
     /* Parse size */
-    param = qemu_opts_create(&resize_options, NULL, 0, NULL);
+    param = qemu_opts_create_nofail(&resize_options);
     if (qemu_opt_set(param, BLOCK_OPT_SIZE, size)) {
         /* Error message already printed when size parsing fails */
         ret = -1;
diff --git a/qemu-io.c b/qemu-io.c
index 92cdb2ab9c..1637773302 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -265,6 +265,18 @@ static int do_co_write_zeroes(int64_t offset, int count, int *total)
     }
 }
 
+static int do_write_compressed(char *buf, int64_t offset, int count, int *total)
+{
+    int ret;
+
+    ret = bdrv_write_compressed(bs, offset >> 9, (uint8_t *)buf, count >> 9);
+    if (ret < 0) {
+        return ret;
+    }
+    *total = count;
+    return 1;
+}
+
 static int do_load_vmstate(char *buf, int64_t offset, int count, int *total)
 {
     *total = bdrv_load_vmstate(bs, (uint8_t *)buf, offset, count);
@@ -687,6 +699,7 @@ static void write_help(void)
 " Writes into a segment of the currently open file, using a buffer\n"
 " filled with a set pattern (0xcdcdcdcd).\n"
 " -b, -- write to the VM state rather than the virtual disk\n"
+" -c, -- write compressed data with bdrv_write_compressed\n"
 " -p, -- use bdrv_pwrite to write the file\n"
 " -P, -- use different pattern to fill file\n"
 " -C, -- report statistics in a machine parsable format\n"
@@ -703,7 +716,7 @@ static const cmdinfo_t write_cmd = {
     .cfunc      = write_f,
     .argmin     = 2,
     .argmax     = -1,
-    .args       = "[-bCpqz] [-P pattern ] off len",
+    .args       = "[-bcCpqz] [-P pattern ] off len",
     .oneline    = "writes a number of bytes at a specified offset",
     .help       = write_help,
 };
@@ -712,6 +725,7 @@ static int write_f(int argc, char **argv)
 {
     struct timeval t1, t2;
     int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0;
+    int cflag = 0;
     int c, cnt;
     char *buf = NULL;
     int64_t offset;
@@ -720,11 +734,14 @@ static int write_f(int argc, char **argv)
     int total = 0;
     int pattern = 0xcd;
 
-    while ((c = getopt(argc, argv, "bCpP:qz")) != EOF) {
+    while ((c = getopt(argc, argv, "bcCpP:qz")) != EOF) {
         switch (c) {
         case 'b':
             bflag = 1;
             break;
+        case 'c':
+            cflag = 1;
+            break;
         case 'C':
             Cflag = 1;
             break;
@@ -801,6 +818,8 @@ static int write_f(int argc, char **argv)
         cnt = do_save_vmstate(buf, offset, count, &total);
     } else if (zflag) {
         cnt = do_co_write_zeroes(offset, count, &total);
+    } else if (cflag) {
+        cnt = do_write_compressed(buf, offset, count, &total);
     } else {
         cnt = do_write(buf, offset, count, &total);
     }
@@ -1652,6 +1671,67 @@ static const cmdinfo_t map_cmd = {
        .oneline        = "prints the allocated areas of a file",
 };
 
+static int break_f(int argc, char **argv)
+{
+    int ret;
+
+    ret = bdrv_debug_breakpoint(bs, argv[1], argv[2]);
+    if (ret < 0) {
+        printf("Could not set breakpoint: %s\n", strerror(-ret));
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t break_cmd = {
+       .name           = "break",
+       .argmin         = 2,
+       .argmax         = 2,
+       .cfunc          = break_f,
+       .args           = "event tag",
+       .oneline        = "sets a breakpoint on event and tags the stopped "
+                         "request as tag",
+};
+
+static int resume_f(int argc, char **argv)
+{
+    int ret;
+
+    ret = bdrv_debug_resume(bs, argv[1]);
+    if (ret < 0) {
+        printf("Could not resume request: %s\n", strerror(-ret));
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t resume_cmd = {
+       .name           = "resume",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = resume_f,
+       .args           = "tag",
+       .oneline        = "resumes the request tagged as tag",
+};
+
+static int wait_break_f(int argc, char **argv)
+{
+    while (!bdrv_debug_is_suspended(bs, argv[1])) {
+        qemu_aio_wait();
+    }
+
+    return 0;
+}
+
+static const cmdinfo_t wait_break_cmd = {
+       .name           = "wait_break",
+       .argmin         = 1,
+       .argmax         = 1,
+       .cfunc          = wait_break_f,
+       .args           = "tag",
+       .oneline        = "waits for the suspension of a request",
+};
+
 static int abort_f(int argc, char **argv)
 {
     abort();
@@ -1915,6 +1995,9 @@ int main(int argc, char **argv)
     add_command(&discard_cmd);
     add_command(&alloc_cmd);
     add_command(&map_cmd);
+    add_command(&break_cmd);
+    add_command(&resume_cmd);
+    add_command(&wait_break_cmd);
     add_command(&abort_cmd);
 
     add_args_command(init_args_command);
diff --git a/qemu-option.c b/qemu-option.c
index 27891e74e7..94557cfde7 100644
--- a/qemu-option.c
+++ b/qemu-option.c
@@ -602,26 +602,36 @@ static void qemu_opt_del(QemuOpt *opt)
     g_free(opt);
 }
 
-static void opt_set(QemuOpts *opts, const char *name, const char *value,
-                    bool prepend, Error **errp)
+static bool opts_accepts_any(const QemuOpts *opts)
+{
+    return opts->list->desc[0].name == NULL;
+}
+
+static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
+                                            const char *name)
 {
-    QemuOpt *opt;
-    const QemuOptDesc *desc = opts->list->desc;
-    Error *local_err = NULL;
     int i;
 
     for (i = 0; desc[i].name != NULL; i++) {
         if (strcmp(desc[i].name, name) == 0) {
-            break;
+            return &desc[i];
         }
     }
-    if (desc[i].name == NULL) {
-        if (i == 0) {
-            /* empty list -> allow any */;
-        } else {
-            error_set(errp, QERR_INVALID_PARAMETER, name);
-            return;
-        }
+
+    return NULL;
+}
+
+static void opt_set(QemuOpts *opts, const char *name, const char *value,
+                    bool prepend, Error **errp)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc;
+    Error *local_err = NULL;
+
+    desc = find_desc_by_name(opts->list->desc, name);
+    if (!desc && !opts_accepts_any(opts)) {
+        error_set(errp, QERR_INVALID_PARAMETER, name);
+        return;
     }
 
     opt = g_malloc0(sizeof(*opt));
@@ -632,9 +642,7 @@ static void opt_set(QemuOpts *opts, const char *name, const char *value,
     } else {
         QTAILQ_INSERT_TAIL(&opts->head, opt, next);
     }
-    if (desc[i].name != NULL) {
-        opt->desc = desc+i;
-    }
+    opt->desc = desc;
     if (value) {
         opt->str = g_strdup(value);
     }
@@ -669,30 +677,43 @@ int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val)
 {
     QemuOpt *opt;
     const QemuOptDesc *desc = opts->list->desc;
-    int i;
 
-    for (i = 0; desc[i].name != NULL; i++) {
-        if (strcmp(desc[i].name, name) == 0) {
-            break;
-        }
-    }
-    if (desc[i].name == NULL) {
-        if (i == 0) {
-            /* empty list -> allow any */;
-        } else {
-            qerror_report(QERR_INVALID_PARAMETER, name);
-            return -1;
-        }
+    opt = g_malloc0(sizeof(*opt));
+    opt->desc = find_desc_by_name(desc, name);
+    if (!opt->desc && !opts_accepts_any(opts)) {
+        qerror_report(QERR_INVALID_PARAMETER, name);
+        g_free(opt);
+        return -1;
     }
 
-    opt = g_malloc0(sizeof(*opt));
     opt->name = g_strdup(name);
     opt->opts = opts;
+    opt->value.boolean = !!val;
+    opt->str = g_strdup(val ? "on" : "off");
     QTAILQ_INSERT_TAIL(&opts->head, opt, next);
-    if (desc[i].name != NULL) {
-        opt->desc = desc+i;
+
+    return 0;
+}
+
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc = opts->list->desc;
+
+    opt = g_malloc0(sizeof(*opt));
+    opt->desc = find_desc_by_name(desc, name);
+    if (!opt->desc && !opts_accepts_any(opts)) {
+        qerror_report(QERR_INVALID_PARAMETER, name);
+        g_free(opt);
+        return -1;
     }
-    opt->value.boolean = !!val;
+
+    opt->name = g_strdup(name);
+    opt->opts = opts;
+    opt->value.uint = val;
+    opt->str = g_strdup_printf("%" PRId64, val);
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+
     return 0;
 }
 
@@ -781,6 +802,15 @@ QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
     return opts;
 }
 
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list)
+{
+    QemuOpts *opts;
+    Error *errp = NULL;
+    opts = qemu_opts_create(list, NULL, 0, &errp);
+    assert_no_error(errp);
+    return opts;
+}
+
 void qemu_opts_reset(QemuOptsList *list)
 {
     QemuOpts *opts, *next_opts;
@@ -1068,23 +1098,15 @@ void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp)
     QemuOpt *opt;
     Error *local_err = NULL;
 
-    assert(opts->list->desc[0].name == NULL);
+    assert(opts_accepts_any(opts));
 
     QTAILQ_FOREACH(opt, &opts->head, next) {
-        int i;
-
-        for (i = 0; desc[i].name != NULL; i++) {
-            if (strcmp(desc[i].name, opt->name) == 0) {
-                break;
-            }
-        }
-        if (desc[i].name == NULL) {
+        opt->desc = find_desc_by_name(desc, opt->name);
+        if (!opt->desc) {
             error_set(errp, QERR_INVALID_PARAMETER, opt->name);
             return;
         }
 
-        opt->desc = &desc[i];
-
         qemu_opt_parse(opt, &local_err);
         if (error_is_set(&local_err)) {
             error_propagate(errp, local_err);
diff --git a/qemu-option.h b/qemu-option.h
index ca729862d5..002dd07ee5 100644
--- a/qemu-option.h
+++ b/qemu-option.h
@@ -126,6 +126,7 @@ int qemu_opt_set(QemuOpts *opts, const char *name, const char *value);
 void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value,
                       Error **errp);
 int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val);
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val);
 typedef int (*qemu_opt_loopfunc)(const char *name, const char *value, void *opaque);
 int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
                      int abort_on_failure);
@@ -133,6 +134,7 @@ int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
 QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id);
 QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
                            int fail_if_exists, Error **errp);
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list);
 void qemu_opts_reset(QemuOptsList *list);
 void qemu_opts_loc_restore(QemuOpts *opts);
 int qemu_opts_set(QemuOptsList *list, const char *id,
diff --git a/qemu-pixman.c b/qemu-pixman.c
index e46e1804f6..e7263fb2bf 100644
--- a/qemu-pixman.c
+++ b/qemu-pixman.c
@@ -21,7 +21,7 @@ int qemu_pixman_get_type(int rshift, int gshift, int bshift)
         if (rshift == 0) {
             type = PIXMAN_TYPE_ABGR;
         } else {
-#if PIXMAN_VERSION >= PIXMAN_VERSION_ENCODE(0, 21, 8)
+#if PIXMAN_VERSION >= PIXMAN_VERSION_ENCODE(0, 16, 0)
             type = PIXMAN_TYPE_BGRA;
 #endif
         }
@@ -52,10 +52,10 @@ pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format,
 }
 
 void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb,
-                              int width, int y)
+                              int width, int x, int y)
 {
     pixman_image_composite(PIXMAN_OP_SRC, fb, NULL, linebuf,
-                           0, y, 0, 0, 0, 0, width, 1);
+                           x, y, 0, 0, 0, 0, width, 1);
 }
 
 pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format,
diff --git a/qemu-pixman.h b/qemu-pixman.h
index bee55eb7da..3c05c83a7c 100644
--- a/qemu-pixman.h
+++ b/qemu-pixman.h
@@ -31,7 +31,7 @@ pixman_format_code_t qemu_pixman_get_format(PixelFormat *pf);
 pixman_image_t *qemu_pixman_linebuf_create(pixman_format_code_t format,
                                            int width);
 void qemu_pixman_linebuf_fill(pixman_image_t *linebuf, pixman_image_t *fb,
-                              int width, int y);
+                              int width, int x, int y);
 pixman_image_t *qemu_pixman_mirror_create(pixman_format_code_t format,
                                           pixman_image_t *image);
 void qemu_pixman_image_unref(pixman_image_t *image);
diff --git a/qemu-sockets.c b/qemu-sockets.c
index d314cf1d1b..c52a40a411 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -579,7 +579,7 @@ int inet_listen(const char *str, char *ostr, int olen,
 
     addr = inet_parse(str, errp);
     if (addr != NULL) {
-        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        opts = qemu_opts_create_nofail(&dummy_opts);
         inet_addr_to_opts(opts, addr);
         qapi_free_InetSocketAddress(addr);
         sock = inet_listen_opts(opts, port_offset, errp);
@@ -618,7 +618,7 @@ int inet_connect(const char *str, Error **errp)
 
     addr = inet_parse(str, errp);
     if (addr != NULL) {
-        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        opts = qemu_opts_create_nofail(&dummy_opts);
         inet_addr_to_opts(opts, addr);
         qapi_free_InetSocketAddress(addr);
         sock = inet_connect_opts(opts, errp, NULL, NULL);
@@ -652,7 +652,7 @@ int inet_nonblocking_connect(const char *str,
 
     addr = inet_parse(str, errp);
     if (addr != NULL) {
-        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        opts = qemu_opts_create_nofail(&dummy_opts);
         inet_addr_to_opts(opts, addr);
         qapi_free_InetSocketAddress(addr);
         sock = inet_connect_opts(opts, errp, callback, opaque);
@@ -795,7 +795,7 @@ int unix_listen(const char *str, char *ostr, int olen, Error **errp)
     char *path, *optstr;
     int sock, len;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
 
     optstr = strchr(str, ',');
     if (optstr) {
@@ -823,7 +823,7 @@ int unix_connect(const char *path, Error **errp)
     QemuOpts *opts;
     int sock;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
     qemu_opt_set(opts, "path", path);
     sock = unix_connect_opts(opts, errp, NULL, NULL);
     qemu_opts_del(opts);
@@ -840,7 +840,7 @@ int unix_nonblocking_connect(const char *path,
 
     g_assert(callback != NULL);
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
     qemu_opt_set(opts, "path", path);
     sock = unix_connect_opts(opts, errp, callback, opaque);
     qemu_opts_del(opts);
@@ -891,7 +891,7 @@ int socket_connect(SocketAddress *addr, Error **errp,
     QemuOpts *opts;
     int fd;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
     switch (addr->kind) {
     case SOCKET_ADDRESS_KIND_INET:
         inet_addr_to_opts(opts, addr->inet);
@@ -922,7 +922,7 @@ int socket_listen(SocketAddress *addr, Error **errp)
     QemuOpts *opts;
     int fd;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    opts = qemu_opts_create_nofail(&dummy_opts);
     switch (addr->kind) {
     case SOCKET_ADDRESS_KIND_INET:
         inet_addr_to_opts(opts, addr->inet);
diff --git a/roms/SLOF b/roms/SLOF
-Subproject f21f7a3f46b557eb5923f899ce8b4401b3cc6d9
+Subproject 0ad10f26c94a86a0c9c3970e53f9a9f6a744055
diff --git a/roms/seabios b/roms/seabios
-Subproject b1c35f2b28cc0c94ebed8176ff61ac0e0b37779
+Subproject e8a76b0f225bba5ba9d63ab227e0a37b3beb105
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index d9d7f75b58..2430f70aca 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -494,16 +494,6 @@ void cpu_dump_state (CPUAlphaState *env, FILE *f, fprintf_function cpu_fprintf,
     cpu_fprintf(f, "\n");
 }
 
-void do_restore_state(CPUAlphaState *env, uintptr_t retaddr)
-{
-    if (retaddr) {
-        TranslationBlock *tb = tb_find_pc(retaddr);
-        if (tb) {
-            cpu_restore_state(tb, env, retaddr);
-        }
-    }
-}
-
 /* This should only be called from translate, via gen_excp.
    We expect that ENV->PC has already been updated.  */
 void QEMU_NORETURN helper_excp(CPUAlphaState *env, int excp, int error)
@@ -519,7 +509,9 @@ void QEMU_NORETURN dynamic_excp(CPUAlphaState *env, uintptr_t retaddr,
 {
     env->exception_index = excp;
     env->error_code = error;
-    do_restore_state(env, retaddr);
+    if (retaddr) {
+        cpu_restore_state(env, retaddr);
+    }
     cpu_loop_exit(env);
 }
 
diff --git a/target-alpha/mem_helper.c b/target-alpha/mem_helper.c
index 617836cc6c..64b33f6518 100644
--- a/target-alpha/mem_helper.c
+++ b/target-alpha/mem_helper.c
@@ -94,7 +94,9 @@ static void do_unaligned_access(CPUAlphaState *env, target_ulong addr,
     uint64_t pc;
     uint32_t insn;
 
-    do_restore_state(env, retaddr);
+    if (retaddr) {
+        cpu_restore_state(env, retaddr);
+    }
 
     pc = env->pc;
     insn = cpu_ldl_code(env, pc);
@@ -143,7 +145,9 @@ void tlb_fill(CPUAlphaState *env, target_ulong addr, int is_write,
 
     ret = cpu_alpha_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret != 0)) {
-        do_restore_state(env, retaddr);
+        if (retaddr) {
+            cpu_restore_state(env, retaddr);
+        }
         /* Exception index and error code are already set */
         cpu_loop_exit(env);
     }
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 6e3ab90e3b..1fcc975945 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -74,19 +74,13 @@ uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
 void tlb_fill(CPUARMState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         raise_exception(env, env->exception_index);
     }
diff --git a/target-cris/op_helper.c b/target-cris/op_helper.c
index a7468d41c6..31db42494d 100644
--- a/target-cris/op_helper.c
+++ b/target-cris/op_helper.c
@@ -57,7 +57,6 @@
 void tlb_fill(CPUCRISState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     D_LOG("%s pc=%x tpc=%x ra=%p\n", __func__,
@@ -66,12 +65,7 @@ void tlb_fill(CPUCRISState *env, target_ulong addr, int is_write, int mmu_idx,
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-
+            if (cpu_restore_state(env, retaddr)) {
 		/* Evaluate flags after retranslation.  */
                 helper_top_evaluate_flags(env);
             }
diff --git a/target-i386/helper.c b/target-i386/helper.c
index bf206cfa97..00341c5233 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1196,15 +1196,12 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank,
 
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access)
 {
-    TranslationBlock *tb;
-
     if (kvm_enabled()) {
         env->tpr_access_type = access;
 
         cpu_interrupt(env, CPU_INTERRUPT_TPR);
     } else {
-        tb = tb_find_pc(env->mem_io_pc);
-        cpu_restore_state(tb, env, env->mem_io_pc);
+        cpu_restore_state(env, env->mem_io_pc);
 
         apic_handle_tpr_access_report(env->apic_state, env->eip, access);
     }
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index 7f99c7cfe3..d0be77b1ed 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -135,19 +135,13 @@ void helper_boundl(CPUX86State *env, target_ulong a0, int v)
 void tlb_fill(CPUX86State *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_x86_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         raise_exception_err(env, env->exception_index, env->error_code);
     }
diff --git a/target-lm32/op_helper.c b/target-lm32/op_helper.c
index 7b91d8c31e..97b9625c1a 100644
--- a/target-lm32/op_helper.c
+++ b/target-lm32/op_helper.c
@@ -76,19 +76,13 @@ uint32_t helper_rcsr_jrx(CPULM32State *env)
 void tlb_fill(CPULM32State *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_lm32_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index aa005048e1..b97ba5e28f 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -56,19 +56,13 @@ extern int semihosting_enabled;
 void tlb_fill(CPUM68KState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_m68k_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-microblaze/op_helper.c b/target-microblaze/op_helper.c
index 210296b30b..7593517094 100644
--- a/target-microblaze/op_helper.c
+++ b/target-microblaze/op_helper.c
@@ -44,19 +44,13 @@
 void tlb_fill(CPUMBState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_mb_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index f45d494b14..2972ae3f0a 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -38,7 +38,6 @@ static inline void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env,
                                                         int error_code,
                                                         uintptr_t pc)
 {
-    TranslationBlock *tb;
 #if 1
     if (exception < 0x100)
         qemu_log("%s: %d %d\n", __func__, exception, error_code);
@@ -48,12 +47,7 @@ static inline void QEMU_NORETURN do_raise_exception_err(CPUMIPSState *env,
 
     if (pc) {
         /* now we have a real cpu fault */
-        tb = tb_find_pc(pc);
-        if (tb) {
-            /* the PC is inside the translated code. It means that we have
-               a virtual CPU fault */
-            cpu_restore_state(tb, env, pc);
-        }
+        cpu_restore_state(env, pc);
     }
 
     cpu_loop_exit(env);
diff --git a/target-openrisc/mmu_helper.c b/target-openrisc/mmu_helper.c
index 59ed371ae0..d2edebcb49 100644
--- a/target-openrisc/mmu_helper.c
+++ b/target-openrisc/mmu_helper.c
@@ -39,8 +39,6 @@
 void tlb_fill(CPUOpenRISCState *env, target_ulong addr, int is_write,
               int mmu_idx, uintptr_t retaddr)
 {
-    TranslationBlock *tb;
-    unsigned long pc;
     int ret;
 
     ret = cpu_openrisc_handle_mmu_fault(env, addr, is_write, mmu_idx);
@@ -48,13 +46,7 @@ void tlb_fill(CPUOpenRISCState *env, target_ulong addr, int is_write,
     if (ret) {
         if (retaddr) {
             /* now we have a real cpu fault.  */
-            pc = (unsigned long)retaddr;
-            tb = tb_find_pc(pc);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we
-                   have a virtual CPU fault.  */
-                cpu_restore_state(tb, env, pc);
-            }
+            cpu_restore_state(env, retaddr);
         }
         /* Raise Exception.  */
         cpu_loop_exit(env);
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 5f1dc8b7d5..742d4f8ae3 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -355,7 +355,7 @@ struct ppc6xx_tlb_t {
 
 typedef struct ppcemb_tlb_t ppcemb_tlb_t;
 struct ppcemb_tlb_t {
-    hwaddr RPN;
+    uint64_t RPN;
     target_ulong EPN;
     target_ulong PID;
     target_ulong size;
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 5b5f1bdd23..04c01445f9 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -275,19 +275,13 @@ STVE(stvewx, cpu_stl_data, bswap32, u32)
 void tlb_fill(CPUPPCState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_ppc_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret != 0)) {
         if (likely(retaddr)) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (likely(tb)) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         helper_raise_exception_err(env, env->exception_index, env->error_code);
     }
diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c
index 6ebc22dd11..91b25e309d 100644
--- a/target-s390x/mem_helper.c
+++ b/target-s390x/mem_helper.c
@@ -47,19 +47,13 @@
 void tlb_fill(CPUS390XState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    TranslationBlock *tb;
     int ret;
 
     ret = cpu_s390x_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret != 0)) {
         if (likely(retaddr)) {
             /* now we have a real cpu fault */
-            tb = tb_find_pc(retaddr);
-            if (likely(tb)) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, retaddr);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 60ec4cbc4d..e8e87f5152 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -21,21 +21,6 @@
 #include "cpu.h"
 #include "helper.h"
 
-static inline void cpu_restore_state_from_retaddr(CPUSH4State *env,
-                                                  uintptr_t retaddr)
-{
-    TranslationBlock *tb;
-
-    if (retaddr) {
-        tb = tb_find_pc(retaddr);
-        if (tb) {
-            /* the PC is inside the translated code. It means that we have
-               a virtual CPU fault */
-            cpu_restore_state(tb, env, retaddr);
-        }
-    }
-}
-
 #ifndef CONFIG_USER_ONLY
 #include "softmmu_exec.h"
 
@@ -61,7 +46,9 @@ void tlb_fill(CPUSH4State *env, target_ulong addr, int is_write, int mmu_idx,
     ret = cpu_sh4_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         /* now we have a real cpu fault */
-        cpu_restore_state_from_retaddr(env, retaddr);
+        if (retaddr) {
+            cpu_restore_state(env, retaddr);
+        }
         cpu_loop_exit(env);
     }
 }
@@ -82,7 +69,9 @@ static inline void QEMU_NORETURN raise_exception(CPUSH4State *env, int index,
                                                  uintptr_t retaddr)
 {
     env->exception_index = index;
-    cpu_restore_state_from_retaddr(env, retaddr);
+    if (retaddr) {
+        cpu_restore_state(env, retaddr);
+    }
     cpu_loop_exit(env);
 }
 
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index 375f20a71e..013ecbd063 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -710,7 +710,6 @@ uint64_t cpu_tick_get_count(CPUTimer *timer);
 void cpu_tick_set_limit(CPUTimer *timer, uint64_t limit);
 trap_state* cpu_tsptr(CPUSPARCState* env);
 #endif
-void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr);
 
 #define TB_FLAG_FPU_ENABLED (1 << 4)
 #define TB_FLAG_AM_ENABLED (1 << 5)
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 556ac286eb..3c8e865eef 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -75,7 +75,7 @@ static target_ulong helper_udiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
-        cpu_restore_state2(env, GETPC());
+        cpu_restore_state(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
 
@@ -114,7 +114,7 @@ static target_ulong helper_sdiv_common(CPUSPARCState *env, target_ulong a,
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
-        cpu_restore_state2(env, GETPC());
+        cpu_restore_state(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
 
@@ -147,7 +147,7 @@ int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b)
 {
     if (b == 0) {
         /* Raise divide by zero trap.  */
-        cpu_restore_state2(env, GETPC());
+        cpu_restore_state(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     } else if (b == -1) {
         /* Avoid overflow trap with i386 divide insn.  */
@@ -161,7 +161,7 @@ uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
 {
     if (b == 0) {
         /* Raise divide by zero trap.  */
-        cpu_restore_state2(env, GETPC());
+        cpu_restore_state(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
     return a / b;
@@ -193,7 +193,7 @@ target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
     return dst;
 
  tag_overflow:
-    cpu_restore_state2(env, GETPC());
+    cpu_restore_state(env, GETPC());
     helper_raise_exception(env, TT_TOVF);
 }
 
@@ -222,6 +222,6 @@ target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
     return dst;
 
  tag_overflow:
-    cpu_restore_state2(env, GETPC());
+    cpu_restore_state(env, GETPC());
     helper_raise_exception(env, TT_TOVF);
 }
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index f3e08fd6e6..8d815e5038 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -2393,22 +2393,6 @@ void cpu_unassigned_access(CPUSPARCState *env, hwaddr addr,
 #endif
 #endif
 
-/* XXX: make it generic ? */
-void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr)
-{
-    TranslationBlock *tb;
-
-    if (retaddr) {
-        /* now we have a real cpu fault */
-        tb = tb_find_pc(retaddr);
-        if (tb) {
-            /* the PC is inside the translated code. It means that we have
-               a virtual CPU fault */
-            cpu_restore_state(tb, env, retaddr);
-        }
-    }
-}
-
 #if !defined(CONFIG_USER_ONLY)
 static void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env,
                                               target_ulong addr, int is_write,
@@ -2418,7 +2402,9 @@ static void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env,
     printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
            "\n", addr, env->pc);
 #endif
-    cpu_restore_state2(env, retaddr);
+    if (retaddr) {
+        cpu_restore_state(env, retaddr);
+    }
     helper_raise_exception(env, TT_UNALIGNED);
 }
 
@@ -2433,7 +2419,9 @@ void tlb_fill(CPUSPARCState *env, target_ulong addr, int is_write, int mmu_idx,
 
     ret = cpu_sparc_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
-        cpu_restore_state2(env, retaddr);
+        if (retaddr) {
+            cpu_restore_state(env, retaddr);
+        }
         cpu_loop_exit(env);
     }
 }
diff --git a/target-unicore32/op_helper.c b/target-unicore32/op_helper.c
index f474d1b59b..b8172ba682 100644
--- a/target-unicore32/op_helper.c
+++ b/target-unicore32/op_helper.c
@@ -256,20 +256,13 @@ uint32_t HELPER(ror_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 void tlb_fill(CPUUniCore32State *env, target_ulong addr, int is_write,
               int mmu_idx, uintptr_t retaddr)
 {
-    TranslationBlock *tb;
-    unsigned long pc;
     int ret;
 
     ret = uc32_cpu_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
             /* now we have a real cpu fault */
-            pc = (unsigned long)retaddr;
-            tb = tb_find_pc(pc);
-            if (tb) {/* the PC is inside the translated code.
-                        It means that we have a virtual CPU fault */
-                cpu_restore_state(tb, env, pc);
-            }
+            cpu_restore_state(env, retaddr);
         }
         cpu_loop_exit(env);
     }
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index 200fb43c28..bf05575eb5 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -522,7 +522,8 @@ static int get_physical_addr_mmu(CPUXtensaState *env, bool update_tlb,
             INST_FETCH_PRIVILEGE_CAUSE;
     }
 
-    *access = mmu_attr_to_access(entry->attr);
+    *access = mmu_attr_to_access(entry->attr) &
+        ~(dtlb ? PAGE_EXEC : PAGE_READ | PAGE_WRITE);
     if (!is_access_granted(*access, is_write)) {
         return dtlb ?
             (is_write ?
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 0e0f21d1a2..84f0449f79 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -47,22 +47,12 @@ static void do_unaligned_access(CPUXtensaState *env,
 #define SHIFT 3
 #include "softmmu_template.h"
 
-static void do_restore_state(CPUXtensaState *env, uintptr_t pc)
-{
-    TranslationBlock *tb;
-
-    tb = tb_find_pc(pc);
-    if (tb) {
-        cpu_restore_state(tb, env, pc);
-    }
-}
-
 static void do_unaligned_access(CPUXtensaState *env,
         target_ulong addr, int is_write, int is_user, uintptr_t retaddr)
 {
     if (xtensa_option_enabled(env->config, XTENSA_OPTION_UNALIGNED_EXCEPTION) &&
             !xtensa_option_enabled(env->config, XTENSA_OPTION_HW_ALIGNMENT)) {
-        do_restore_state(env, retaddr);
+        cpu_restore_state(env, retaddr);
         HELPER(exception_cause_vaddr)(env,
                 env->pc, LOAD_STORE_ALIGNMENT_CAUSE, addr);
     }
@@ -86,7 +76,7 @@ void tlb_fill(CPUXtensaState *env,
                 paddr & TARGET_PAGE_MASK,
                 access, mmu_idx, page_size);
     } else {
-        do_restore_state(env, retaddr);
+        cpu_restore_state(env, retaddr);
         HELPER(exception_cause_vaddr)(env, env->pc, ret, vaddr);
     }
 }
diff --git a/tests/qemu-iotests/045 b/tests/qemu-iotests/045
new file mode 100755
index 0000000000..2b6f1af27a
--- /dev/null
+++ b/tests/qemu-iotests/045
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+#
+# Tests for fdsets.
+#
+# Copyright (C) 2012 IBM Corp.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import qemu_img
+
+image0 = os.path.join(iotests.test_dir, 'image0')
+image1 = os.path.join(iotests.test_dir, 'image1')
+image2 = os.path.join(iotests.test_dir, 'image2')
+image3 = os.path.join(iotests.test_dir, 'image3')
+image4 = os.path.join(iotests.test_dir, 'image4')
+
+class TestFdSets(iotests.QMPTestCase):
+
+    def setUp(self):
+        self.vm = iotests.VM()
+        qemu_img('create', '-f', iotests.imgfmt, image0, '128K')
+        qemu_img('create', '-f', iotests.imgfmt, image1, '128K')
+        qemu_img('create', '-f', iotests.imgfmt, image2, '128K')
+        qemu_img('create', '-f', iotests.imgfmt, image3, '128K')
+        qemu_img('create', '-f', iotests.imgfmt, image4, '128K')
+        self.file0 = open(image0, 'r')
+        self.file1 = open(image1, 'w+')
+        self.file2 = open(image2, 'r')
+        self.file3 = open(image3, 'r')
+        self.file4 = open(image4, 'r')
+        self.vm.add_fd(self.file0.fileno(), 1, 'image0:r')
+        self.vm.add_fd(self.file1.fileno(), 1, 'image1:w+')
+        self.vm.add_fd(self.file2.fileno(), 0, 'image2:r')
+        self.vm.add_fd(self.file3.fileno(), 2, 'image3:r')
+        self.vm.add_fd(self.file4.fileno(), 2, 'image4:r')
+        self.vm.add_drive("/dev/fdset/1")
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        self.file0.close()
+        self.file1.close()
+        self.file2.close()
+        self.file3.close()
+        self.file4.close()
+        os.remove(image0)
+        os.remove(image1)
+        os.remove(image2)
+        os.remove(image3)
+        os.remove(image4)
+
+    def test_query_fdset(self):
+        result = self.vm.qmp('query-fdsets')
+        self.assert_qmp(result, 'return[0]/fdset-id', 2)
+        self.assert_qmp(result, 'return[1]/fdset-id', 1)
+        self.assert_qmp(result, 'return[2]/fdset-id', 0)
+        self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image3:r')
+        self.assert_qmp(result, 'return[0]/fds[1]/opaque', 'image4:r')
+        self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image0:r')
+        self.assert_qmp(result, 'return[1]/fds[1]/opaque', 'image1:w+')
+        self.assert_qmp(result, 'return[2]/fds[0]/opaque', 'image2:r')
+        self.vm.shutdown()
+
+    def test_remove_fdset(self):
+        result = self.vm.qmp('remove-fd', fdset_id=2)
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp('query-fdsets')
+        self.assert_qmp(result, 'return[0]/fdset-id', 1)
+        self.assert_qmp(result, 'return[1]/fdset-id', 0)
+        self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image0:r')
+        self.assert_qmp(result, 'return[0]/fds[1]/opaque', 'image1:w+')
+        self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image2:r')
+        self.vm.shutdown()
+
+    def test_remove_fd(self):
+        result = self.vm.qmp('query-fdsets')
+        fd_image3 = result['return'][0]['fds'][0]['fd']
+        result = self.vm.qmp('remove-fd', fdset_id=2, fd=fd_image3)
+        self.assert_qmp(result, 'return', {})
+        result = self.vm.qmp('query-fdsets')
+        self.assert_qmp(result, 'return[0]/fdset-id', 2)
+        self.assert_qmp(result, 'return[1]/fdset-id', 1)
+        self.assert_qmp(result, 'return[2]/fdset-id', 0)
+        self.assert_qmp(result, 'return[0]/fds[0]/opaque', 'image4:r')
+        self.assert_qmp(result, 'return[1]/fds[0]/opaque', 'image0:r')
+        self.assert_qmp(result, 'return[1]/fds[1]/opaque', 'image1:w+')
+        self.assert_qmp(result, 'return[2]/fds[0]/opaque', 'image2:r')
+        self.vm.shutdown()
+
+    def test_remove_fd_invalid_fdset(self):
+        result = self.vm.qmp('query-fdsets')
+        fd_image3 = result['return'][0]['fds'][0]['fd']
+        result = self.vm.qmp('remove-fd', fdset_id=3, fd=fd_image3)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc',
+            'File descriptor named \'fdset-id:3, fd:%d\' not found' % fd_image3)
+        self.vm.shutdown()
+
+    def test_remove_fd_invalid_fd(self):
+        result = self.vm.qmp('query-fdsets')
+        result = self.vm.qmp('remove-fd', fdset_id=2, fd=999)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc',
+            'File descriptor named \'fdset-id:2, fd:999\' not found')
+        self.vm.shutdown()
+
+    def test_add_fd_invalid_fd(self):
+        result = self.vm.qmp('add-fd', fdset_id=2)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc',
+                'No file descriptor supplied via SCM_RIGHTS')
+        self.vm.shutdown()
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['raw'])
diff --git a/tests/qemu-iotests/045.out b/tests/qemu-iotests/045.out
new file mode 100644
index 0000000000..3f8a935a08
--- /dev/null
+++ b/tests/qemu-iotests/045.out
@@ -0,0 +1,5 @@
+......
+----------------------------------------------------------------------
+Ran 6 tests
+
+OK
diff --git a/tests/qemu-iotests/046 b/tests/qemu-iotests/046
new file mode 100755
index 0000000000..e0176f42df
--- /dev/null
+++ b/tests/qemu-iotests/046
@@ -0,0 +1,215 @@
+#!/bin/bash
+#
+# Test concurrent cluster allocations
+#
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto generic
+_supported_os Linux
+
+CLUSTER_SIZE=64k
+size=128M
+
+echo
+echo "== creating backing file for COW tests =="
+
+_make_test_img $size
+
+function backing_io()
+{
+    local offset=$1
+    local sectors=$2
+    local op=$3
+    local pattern=0
+    local cur_sec=0
+
+    for i in $(seq 0 $((sectors - 1))); do
+        cur_sec=$((offset / 65536 + i))
+        pattern=$(( ( (cur_sec % 128) + (cur_sec / 128)) % 128 ))
+
+        echo "$op -P $pattern $((cur_sec * 64))k 64k"
+    done
+}
+
+backing_io 0 16 write | $QEMU_IO $TEST_IMG | _filter_qemu_io
+
+mv $TEST_IMG $TEST_IMG.base
+
+_make_test_img -b $TEST_IMG.base 6G
+
+echo
+echo "== Some concurrent requests touching the same cluster =="
+
+function overlay_io()
+{
+# Allocate middle of cluster 1, then write to somewhere before and after it
+cat  <<EOF
+break write_aio A
+aio_write -P 10 0x18000 0x2000
+wait_break A
+
+aio_write -P 11 0x12000 0x2000
+aio_write -P 12 0x1c000 0x2000
+
+resume A
+aio_flush
+EOF
+
+# Sequential write case: Alloc middle of cluster 2, then write overlapping
+# to next cluster
+cat  <<EOF
+break write_aio A
+aio_write -P 20 0x28000 0x2000
+wait_break A
+aio_write -P 21 0x2a000 0x10000
+resume A
+aio_flush
+EOF
+
+# The same with a gap between both requests
+cat  <<EOF
+break write_aio A
+aio_write -P 40 0x48000 0x2000
+wait_break A
+aio_write -P 41 0x4c000 0x10000
+resume A
+aio_flush
+EOF
+
+# Sequential write, but the next cluster is already allocated
+cat  <<EOF
+write -P 70 0x76000 0x8000
+aio_flush
+break write_aio A
+aio_write -P 60 0x66000 0x2000
+wait_break A
+aio_write -P 61 0x6a000 0xe000
+resume A
+aio_flush
+EOF
+
+# Sequential write, but the next cluster is already allocated
+# and phyiscally in the right position
+cat  <<EOF
+write -P 89 0x80000 0x1000
+write -P 90 0x96000 0x8000
+aio_flush
+discard 0x80000 0x10000
+aio_flush
+break write_aio A
+aio_write -P 80 0x86000 0x2000
+wait_break A
+aio_write -P 81 0x8a000 0xe000
+resume A
+aio_flush
+EOF
+
+# Sequential write, and the next cluster is compressed
+cat  <<EOF
+write    -P 109 0xa0000 0x1000
+write -c -P 110 0xb0000 0x10000
+aio_flush
+discard 0xa0000 0x10000
+aio_flush
+break write_aio A
+aio_write -P 100 0xa6000 0x2000
+wait_break A
+aio_write -P 101 0xaa000 0xe000
+resume A
+aio_flush
+EOF
+}
+
+overlay_io | $QEMU_IO blkdebug::$TEST_IMG | _filter_qemu_io |\
+	sed -e 's/bytes at offset [0-9]*/bytes at offset XXX/g'
+
+echo
+echo "== Verify image content =="
+
+function verify_io()
+{
+    echo read -P 0 0 0x10000
+
+    echo read -P 1  0x10000 0x2000
+    echo read -P 11 0x12000 0x2000
+    echo read -P 1  0x14000 0x4000
+    echo read -P 10 0x18000 0x2000
+    echo read -P 1  0x1a000 0x2000
+    echo read -P 12 0x1c000 0x2000
+    echo read -P 1  0x1e000 0x2000
+
+    echo read -P 2  0x20000 0x8000
+    echo read -P 20 0x28000 0x2000
+    echo read -P 21 0x2a000 0x10000
+    echo read -P 3  0x3a000 0x6000
+
+    echo read -P 4  0x40000 0x8000
+    echo read -P 40 0x48000 0x2000
+    echo read -P 4  0x4a000 0x2000
+    echo read -P 41 0x4c000 0x10000
+    echo read -P 5  0x5c000 0x4000
+
+    echo read -P 6  0x60000 0x6000
+    echo read -P 60 0x66000 0x2000
+    echo read -P 6  0x68000 0x2000
+    echo read -P 61 0x6a000 0xe000
+    echo read -P 70 0x78000 0x6000
+    echo read -P 7  0x7e000 0x2000
+
+    echo read -P 8  0x80000 0x6000
+    echo read -P 80 0x86000 0x2000
+    echo read -P 8  0x88000 0x2000
+    echo read -P 81 0x8a000 0xe000
+    echo read -P 90 0x98000 0x6000
+    echo read -P 9  0x9e000 0x2000
+
+    echo read -P 10  0xa0000 0x6000
+    echo read -P 100 0xa6000 0x2000
+    echo read -P 10  0xa8000 0x2000
+    echo read -P 101 0xaa000 0xe000
+    echo read -P 110 0xb8000 0x8000
+}
+
+verify_io | $QEMU_IO $TEST_IMG | _filter_qemu_io
+
+_check_test_img
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/046.out b/tests/qemu-iotests/046.out
new file mode 100644
index 0000000000..565360fe60
--- /dev/null
+++ b/tests/qemu-iotests/046.out
@@ -0,0 +1,163 @@
+QA output created by 046
+
+== creating backing file for COW tests ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+qemu-io> wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 65536
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 131072
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 196608
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 262144
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 327680
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 393216
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 458752
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 524288
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 589824
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 655360
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 720896
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 786432
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 851968
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 917504
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset 983040
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=6442450944 backing_file='TEST_DIR/t.IMGFMT.base' 
+
+== Some concurrent requests touching the same cluster ==
+qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 32768/32768 bytes at offset XXX
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 57344/57344 bytes at offset XXX
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 4096/4096 bytes at offset XXX
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 32768/32768 bytes at offset XXX
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> discard 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 57344/57344 bytes at offset XXX
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 4096/4096 bytes at offset XXX
+4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> wrote 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> discard 65536/65536 bytes at offset XXX
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> qemu-io> qemu-io> blkdebug: Suspended request 'A'
+qemu-io> qemu-io> qemu-io> blkdebug: Resuming request 'A'
+qemu-io> wrote 8192/8192 bytes at offset XXX
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 57344/57344 bytes at offset XXX
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> 
+== Verify image content ==
+qemu-io> read 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 65536
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 73728
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 16384/16384 bytes at offset 81920
+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 98304
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 106496
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 114688
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 122880
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 32768/32768 bytes at offset 131072
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 163840
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 65536/65536 bytes at offset 172032
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 237568
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 32768/32768 bytes at offset 262144
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 294912
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 303104
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 65536/65536 bytes at offset 311296
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 16384/16384 bytes at offset 376832
+16 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 393216
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 417792
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 425984
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 57344/57344 bytes at offset 434176
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 491520
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 516096
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 524288
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 548864
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 557056
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 57344/57344 bytes at offset 565248
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 622592
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 647168
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 24576/24576 bytes at offset 655360
+24 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 679936
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 8192/8192 bytes at offset 688128
+8 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 57344/57344 bytes at offset 696320
+56 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> read 32768/32768 bytes at offset 753664
+32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+qemu-io> No errors were found on the image.
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index a4a9044f24..a0307de06b 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -51,3 +51,5 @@
 042 rw auto quick
 043 rw auto backing
 044 rw auto
+045 rw auto
+046 rw auto aio
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 0be5c7e13f..569ca3d804 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -79,6 +79,18 @@ class VM(object):
         self._num_drives += 1
         return self
 
+    def add_fd(self, fd, fdset, opaque, opts=''):
+        '''Pass a file descriptor to the VM'''
+        options = ['fd=%d' % fd,
+                   'set=%d' % fdset,
+                   'opaque=%s' % opaque]
+        if opts:
+            options.append(opts)
+
+        self._args.append('-add-fd')
+        self._args.append(','.join(options))
+        return self
+
     def launch(self):
         '''Launch the VM and establish a QMP connection'''
         devnull = open('/dev/null', 'rb')
diff --git a/tests/test-aio.c b/tests/test-aio.c
index f53c908707..a8a4f0c6a5 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -15,6 +15,14 @@
 
 AioContext *ctx;
 
+/* Wait until there are no more BHs or AIO requests */
+static void wait_for_aio(void)
+{
+    while (aio_poll(ctx, true)) {
+        /* Do nothing */
+    }
+}
+
 /* Simple callbacks for testing.  */
 
 typedef struct {
@@ -78,14 +86,6 @@ static void test_notify(void)
     g_assert(!aio_poll(ctx, false));
 }
 
-static void test_flush(void)
-{
-    g_assert(!aio_poll(ctx, false));
-    aio_notify(ctx);
-    aio_flush(ctx);
-    g_assert(!aio_poll(ctx, false));
-}
-
 static void test_bh_schedule(void)
 {
     BHTestData data = { .n = 0 };
@@ -116,7 +116,7 @@ static void test_bh_schedule10(void)
     g_assert(aio_poll(ctx, true));
     g_assert_cmpint(data.n, ==, 2);
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data.n, ==, 10);
 
     g_assert(!aio_poll(ctx, false));
@@ -164,7 +164,7 @@ static void test_bh_delete_from_cb(void)
     qemu_bh_schedule(data1.bh);
     g_assert_cmpint(data1.n, ==, 0);
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data1.n, ==, data1.max);
     g_assert(data1.bh == NULL);
 
@@ -200,7 +200,7 @@ static void test_bh_delete_from_cb_many(void)
     g_assert_cmpint(data4.n, ==, 1);
     g_assert(data1.bh == NULL);
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data1.n, ==, data1.max);
     g_assert_cmpint(data2.n, ==, data2.max);
     g_assert_cmpint(data3.n, ==, data3.max);
@@ -219,7 +219,7 @@ static void test_bh_flush(void)
     qemu_bh_schedule(data.bh);
     g_assert_cmpint(data.n, ==, 0);
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data.n, ==, 1);
 
     g_assert(!aio_poll(ctx, false));
@@ -281,7 +281,7 @@ static void test_flush_event_notifier(void)
     g_assert_cmpint(data.active, ==, 9);
     g_assert(aio_poll(ctx, false));
 
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data.n, ==, 10);
     g_assert_cmpint(data.active, ==, 0);
     g_assert(!aio_poll(ctx, false));
@@ -325,7 +325,7 @@ static void test_wait_event_notifier_noflush(void)
     g_assert_cmpint(data.n, ==, 2);
 
     event_notifier_set(&dummy.e);
-    aio_flush(ctx);
+    wait_for_aio();
     g_assert_cmpint(data.n, ==, 2);
     g_assert_cmpint(dummy.n, ==, 1);
     g_assert_cmpint(dummy.active, ==, 0);
@@ -346,7 +346,7 @@ static void test_wait_event_notifier_noflush(void)
  * - sometimes both the AioContext and the glib main loop wake
  *   themselves up.  Hence, some "g_assert(!aio_poll(ctx, false));"
  *   are replaced by "while (g_main_context_iteration(NULL, false));".
- * - there is no exact replacement for aio_flush's blocking wait.
+ * - there is no exact replacement for a blocking wait.
  *   "while (g_main_context_iteration(NULL, true)" seems to work,
  *   but it is not documented _why_ it works.  For these tests a
  *   non-blocking loop like "while (g_main_context_iteration(NULL, false)"
@@ -637,7 +637,6 @@ int main(int argc, char **argv)
 
     g_test_init(&argc, &argv, NULL);
     g_test_add_func("/aio/notify",                  test_notify);
-    g_test_add_func("/aio/flush",                   test_flush);
     g_test_add_func("/aio/bh/schedule",             test_bh_schedule);
     g_test_add_func("/aio/bh/schedule10",           test_bh_schedule10);
     g_test_add_func("/aio/bh/cancel",               test_bh_cancel);
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index fea0445fb4..ea8e676b0c 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -47,11 +47,19 @@ static void qemu_aio_wait_nonblocking(void)
     qemu_aio_wait();
 }
 
+/* Wait until all aio and bh activity has finished */
+static void qemu_aio_wait_all(void)
+{
+    while (qemu_aio_wait()) {
+        /* Do nothing */
+    }
+}
+
 static void test_submit(void)
 {
     WorkerTestData data = { .n = 0 };
     thread_pool_submit(worker_cb, &data);
-    qemu_aio_flush();
+    qemu_aio_wait_all();
     g_assert_cmpint(data.n, ==, 1);
 }
 
@@ -63,7 +71,7 @@ static void test_submit_aio(void)
     /* The callbacks are not called until after the first wait.  */
     active = 1;
     g_assert_cmpint(data.ret, ==, -EINPROGRESS);
-    qemu_aio_flush();
+    qemu_aio_wait_all();
     g_assert_cmpint(active, ==, 0);
     g_assert_cmpint(data.n, ==, 1);
     g_assert_cmpint(data.ret, ==, 0);
@@ -84,7 +92,7 @@ static void co_test_cb(void *opaque)
     data->ret = 0;
     active--;
 
-    /* The test continues in test_submit_co, after qemu_aio_flush... */
+    /* The test continues in test_submit_co, after qemu_aio_wait_all... */
 }
 
 static void test_submit_co(void)
@@ -99,9 +107,9 @@ static void test_submit_co(void)
     g_assert_cmpint(active, ==, 1);
     g_assert_cmpint(data.ret, ==, -EINPROGRESS);
 
-    /* qemu_aio_flush will execute the rest of the coroutine.  */
+    /* qemu_aio_wait_all will execute the rest of the coroutine.  */
 
-    qemu_aio_flush();
+    qemu_aio_wait_all();
 
     /* Back here after the coroutine has finished.  */
 
@@ -184,7 +192,7 @@ static void test_cancel(void)
     }
 
     /* Finish execution and execute any remaining callbacks.  */
-    qemu_aio_flush();
+    qemu_aio_wait_all();
     g_assert_cmpint(active, ==, 0);
     for (i = 0; i < 100; i++) {
         if (data[i].n == 3) {
diff --git a/trace-events b/trace-events
index 6c6cbf10fd..6cb450a993 100644
--- a/trace-events
+++ b/trace-events
@@ -1022,3 +1022,16 @@ spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, requested %
 spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
 spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@%"PRIx64"<=%"PRIx64" IRQ %u"
 spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+
+# hw/xics.c
+xics_icp_check_ipi(int server, uint8_t mfrr) "CPU %d can take IPI mfrr=%#x"
+xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr) "icp_accept: XIRR %#"PRIx32"->%#"PRIx32
+xics_icp_eoi(int server, uint32_t xirr, uint32_t new_xirr) "icp_eoi: server %d given XIRR %#"PRIx32" new XIRR %#"PRIx32
+xics_icp_irq(int server, int nr, uint8_t priority) "cpu %d trying to deliver irq %#"PRIx32" priority %#x"
+xics_icp_raise(uint32_t xirr, uint8_t pending_priority) "raising IRQ new XIRR=%#x new pending priority=%#x"
+xics_set_irq_msi(int srcno, int nr) "set_irq_msi: srcno %d [irq %#x]"
+xics_masked_pending(void) "set_irq_msi: masked pending"
+xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
+xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
+xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
+xics_ics_eoi(int nr) "ics_eoi: irq %#x"
diff --git a/translate-all.c b/translate-all.c
index f22e3eedd2..164870a68c 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -16,6 +16,12 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -24,15 +30,120 @@
 
 #include "config.h"
 
+#include "qemu-common.h"
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
 #include "disas.h"
 #include "tcg.h"
 #include "qemu-timer.h"
+#include "memory.h"
+#include "exec-memory.h"
+#if defined(CONFIG_USER_ONLY)
+#include "qemu.h"
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+#include <sys/param.h>
+#if __FreeBSD_version >= 700104
+#define HAVE_KINFO_GETVMMAP
+#define sigqueue sigqueue_freebsd  /* avoid redefinition */
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <machine/profile.h>
+#define _KERNEL
+#include <sys/user.h>
+#undef _KERNEL
+#undef sigqueue
+#include <libutil.h>
+#endif
+#endif
+#endif
+
+#include "cputlb.h"
+#include "translate-all.h"
+
+//#define DEBUG_TB_INVALIDATE
+//#define DEBUG_FLUSH
+/* make various TB consistency checks */
+//#define DEBUG_TB_CHECK
+
+#if !defined(CONFIG_USER_ONLY)
+/* TB consistency checks only implemented for usermode emulation.  */
+#undef DEBUG_TB_CHECK
+#endif
+
+#define SMC_BITMAP_USE_THRESHOLD 10
+
+/* Code generation and translation blocks */
+static TranslationBlock *tbs;
+static int code_gen_max_blocks;
+TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+static int nb_tbs;
+/* any access to the tbs or the page table must use this lock */
+spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
+
+uint8_t *code_gen_prologue;
+static uint8_t *code_gen_buffer;
+static size_t code_gen_buffer_size;
+/* threshold to flush the translated code buffer */
+static size_t code_gen_buffer_max_size;
+static uint8_t *code_gen_ptr;
+
+typedef struct PageDesc {
+    /* list of TBs intersecting this ram page */
+    TranslationBlock *first_tb;
+    /* in order to optimize self modifying code, we count the number
+       of lookups we do to a given page to use a bitmap */
+    unsigned int code_write_count;
+    uint8_t *code_bitmap;
+#if defined(CONFIG_USER_ONLY)
+    unsigned long flags;
+#endif
+} PageDesc;
+
+/* In system mode we want L1_MAP to be based on ram offsets,
+   while in user mode we want it to be based on virtual addresses.  */
+#if !defined(CONFIG_USER_ONLY)
+#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
+# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
+#else
+# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
+#endif
+#else
+# define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
+#endif
+
+/* The bits remaining after N lower levels of page tables.  */
+#define V_L1_BITS_REM \
+    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
+
+#if V_L1_BITS_REM < 4
+#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
+#else
+#define V_L1_BITS  V_L1_BITS_REM
+#endif
+
+#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
+
+#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
+
+uintptr_t qemu_real_host_page_size;
+uintptr_t qemu_host_page_size;
+uintptr_t qemu_host_page_mask;
+
+/* This is a multi-level map on the virtual address space.
+   The bottom level has pointers to PageDesc.  */
+static void *l1_map[V_L1_SIZE];
+
+/* statistics */
+static int tb_flush_count;
+static int tb_phys_invalidate_count;
 
 /* code generation context */
 TCGContext tcg_ctx;
 
+static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+                         tb_page_addr_t phys_page2);
+static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
+
 void cpu_gen_init(void)
 {
     tcg_context_init(&tcg_ctx); 
@@ -101,8 +212,8 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
 
 /* The cpu state corresponding to 'searched_pc' is restored.
  */
-int cpu_restore_state(TranslationBlock *tb,
-                      CPUArchState *env, uintptr_t searched_pc)
+static int cpu_restore_state_from_tb(TranslationBlock *tb, CPUArchState *env,
+                                     uintptr_t searched_pc)
 {
     TCGContext *s = &tcg_ctx;
     int j;
@@ -155,3 +266,1624 @@ int cpu_restore_state(TranslationBlock *tb,
 #endif
     return 0;
 }
+
+bool cpu_restore_state(CPUArchState *env, uintptr_t retaddr)
+{
+    TranslationBlock *tb;
+
+    tb = tb_find_pc(retaddr);
+    if (tb) {
+        cpu_restore_state_from_tb(tb, env, retaddr);
+        return true;
+    }
+    return false;
+}
+
+#ifdef _WIN32
+static inline void map_exec(void *addr, long size)
+{
+    DWORD old_protect;
+    VirtualProtect(addr, size,
+                   PAGE_EXECUTE_READWRITE, &old_protect);
+}
+#else
+static inline void map_exec(void *addr, long size)
+{
+    unsigned long start, end, page_size;
+
+    page_size = getpagesize();
+    start = (unsigned long)addr;
+    start &= ~(page_size - 1);
+
+    end = (unsigned long)addr + size;
+    end += page_size - 1;
+    end &= ~(page_size - 1);
+
+    mprotect((void *)start, end - start,
+             PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+#endif
+
+static void page_init(void)
+{
+    /* NOTE: we can always suppose that qemu_host_page_size >=
+       TARGET_PAGE_SIZE */
+#ifdef _WIN32
+    {
+        SYSTEM_INFO system_info;
+
+        GetSystemInfo(&system_info);
+        qemu_real_host_page_size = system_info.dwPageSize;
+    }
+#else
+    qemu_real_host_page_size = getpagesize();
+#endif
+    if (qemu_host_page_size == 0) {
+        qemu_host_page_size = qemu_real_host_page_size;
+    }
+    if (qemu_host_page_size < TARGET_PAGE_SIZE) {
+        qemu_host_page_size = TARGET_PAGE_SIZE;
+    }
+    qemu_host_page_mask = ~(qemu_host_page_size - 1);
+
+#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
+    {
+#ifdef HAVE_KINFO_GETVMMAP
+        struct kinfo_vmentry *freep;
+        int i, cnt;
+
+        freep = kinfo_getvmmap(getpid(), &cnt);
+        if (freep) {
+            mmap_lock();
+            for (i = 0; i < cnt; i++) {
+                unsigned long startaddr, endaddr;
+
+                startaddr = freep[i].kve_start;
+                endaddr = freep[i].kve_end;
+                if (h2g_valid(startaddr)) {
+                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
+
+                    if (h2g_valid(endaddr)) {
+                        endaddr = h2g(endaddr);
+                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+                    } else {
+#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
+                        endaddr = ~0ul;
+                        page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+#endif
+                    }
+                }
+            }
+            free(freep);
+            mmap_unlock();
+        }
+#else
+        FILE *f;
+
+        last_brk = (unsigned long)sbrk(0);
+
+        f = fopen("/compat/linux/proc/self/maps", "r");
+        if (f) {
+            mmap_lock();
+
+            do {
+                unsigned long startaddr, endaddr;
+                int n;
+
+                n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
+
+                if (n == 2 && h2g_valid(startaddr)) {
+                    startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
+
+                    if (h2g_valid(endaddr)) {
+                        endaddr = h2g(endaddr);
+                    } else {
+                        endaddr = ~0ul;
+                    }
+                    page_set_flags(startaddr, endaddr, PAGE_RESERVED);
+                }
+            } while (!feof(f));
+
+            fclose(f);
+            mmap_unlock();
+        }
+#endif
+    }
+#endif
+}
+
+static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
+{
+    PageDesc *pd;
+    void **lp;
+    int i;
+
+#if defined(CONFIG_USER_ONLY)
+    /* We can't use g_malloc because it may recurse into a locked mutex. */
+# define ALLOC(P, SIZE)                                 \
+    do {                                                \
+        P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,    \
+                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);   \
+    } while (0)
+#else
+# define ALLOC(P, SIZE) \
+    do { P = g_malloc0(SIZE); } while (0)
+#endif
+
+    /* Level 1.  Always allocated.  */
+    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
+
+    /* Level 2..N-1.  */
+    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+        void **p = *lp;
+
+        if (p == NULL) {
+            if (!alloc) {
+                return NULL;
+            }
+            ALLOC(p, sizeof(void *) * L2_SIZE);
+            *lp = p;
+        }
+
+        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
+    }
+
+    pd = *lp;
+    if (pd == NULL) {
+        if (!alloc) {
+            return NULL;
+        }
+        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
+        *lp = pd;
+    }
+
+#undef ALLOC
+
+    return pd + (index & (L2_SIZE - 1));
+}
+
+static inline PageDesc *page_find(tb_page_addr_t index)
+{
+    return page_find_alloc(index, 0);
+}
+
+#if !defined(CONFIG_USER_ONLY)
+#define mmap_lock() do { } while (0)
+#define mmap_unlock() do { } while (0)
+#endif
+
+#if defined(CONFIG_USER_ONLY)
+/* Currently it is not recommended to allocate big chunks of data in
+   user mode. It will change when a dedicated libc will be used.  */
+/* ??? 64-bit hosts ought to have no problem mmaping data outside the
+   region in which the guest needs to run.  Revisit this.  */
+#define USE_STATIC_CODE_GEN_BUFFER
+#endif
+
+/* ??? Should configure for this, not list operating systems here.  */
+#if (defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__))
+# define USE_MMAP
+#endif
+
+/* Minimum size of the code gen buffer.  This number is randomly chosen,
+   but not so small that we can't have a fair number of TB's live.  */
+#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
+
+/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
+   indicated, this is constrained by the range of direct branches on the
+   host cpu, as used by the TCG implementation of goto_tb.  */
+#if defined(__x86_64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__sparc__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__arm__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
+#elif defined(__s390x__)
+  /* We have a +- 4GB range on the branches; leave some slop.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#else
+# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
+
+static inline size_t size_code_gen_buffer(size_t tb_size)
+{
+    /* Size the buffer.  */
+    if (tb_size == 0) {
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+#else
+        /* ??? Needs adjustments.  */
+        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
+           static buffer, we could size this on RESERVED_VA, on the text
+           segment size of the executable, or continue to use the default.  */
+        tb_size = (unsigned long)(ram_size / 4);
+#endif
+    }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+    }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+    }
+    code_gen_buffer_size = tb_size;
+    return tb_size;
+}
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+    __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static inline void *alloc_code_gen_buffer(void)
+{
+    map_exec(static_code_gen_buffer, code_gen_buffer_size);
+    return static_code_gen_buffer;
+}
+#elif defined(USE_MMAP)
+static inline void *alloc_code_gen_buffer(void)
+{
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    uintptr_t start = 0;
+    void *buf;
+
+    /* Constrain the position of the buffer based on the host cpu.
+       Note that these addresses are chosen in concert with the
+       addresses assigned in the relevant linker script file.  */
+# if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+# elif defined(__x86_64__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    if (code_gen_buffer_size > 800u * 1024 * 1024) {
+        code_gen_buffer_size = 800u * 1024 * 1024;
+    }
+# elif defined(__sparc__)
+    start = 0x40000000ul;
+# elif defined(__s390x__)
+    start = 0x90000000ul;
+# endif
+
+    buf = mmap((void *)start, code_gen_buffer_size,
+               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    return buf == MAP_FAILED ? NULL : buf;
+}
+#else
+static inline void *alloc_code_gen_buffer(void)
+{
+    void *buf = g_malloc(code_gen_buffer_size);
+
+    if (buf) {
+        map_exec(buf, code_gen_buffer_size);
+    }
+    return buf;
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+
+static inline void code_gen_alloc(size_t tb_size)
+{
+    code_gen_buffer_size = size_code_gen_buffer(tb_size);
+    code_gen_buffer = alloc_code_gen_buffer();
+    if (code_gen_buffer == NULL) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
+    }
+
+    qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE);
+
+    /* Steal room for the prologue at the end of the buffer.  This ensures
+       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
+       from TB's to the prologue are going to be in range.  It also means
+       that we don't need to mark (additional) portions of the data segment
+       as executable.  */
+    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
+    code_gen_buffer_size -= 1024;
+
+    code_gen_buffer_max_size = code_gen_buffer_size -
+        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
+    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
+    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+}
+
+/* Must be called before using the QEMU cpus. 'tb_size' is the size
+   (in bytes) allocated to the translation buffer. Zero means default
+   size. */
+void tcg_exec_init(unsigned long tb_size)
+{
+    cpu_gen_init();
+    code_gen_alloc(tb_size);
+    code_gen_ptr = code_gen_buffer;
+    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
+    page_init();
+#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
+    /* There's no guest base to take into account, so go ahead and
+       initialize the prologue now.  */
+    tcg_prologue_init(&tcg_ctx);
+#endif
+}
+
+bool tcg_enabled(void)
+{
+    return code_gen_buffer != NULL;
+}
+
+/* Allocate a new translation block. Flush the translation buffer if
+   too many translation blocks or too much generated code. */
+static TranslationBlock *tb_alloc(target_ulong pc)
+{
+    TranslationBlock *tb;
+
+    if (nb_tbs >= code_gen_max_blocks ||
+        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size) {
+        return NULL;
+    }
+    tb = &tbs[nb_tbs++];
+    tb->pc = pc;
+    tb->cflags = 0;
+    return tb;
+}
+
+void tb_free(TranslationBlock *tb)
+{
+    /* In practice this is mostly used for single use temporary TB
+       Ignore the hard cases and just back up if this TB happens to
+       be the last one generated.  */
+    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
+        code_gen_ptr = tb->tc_ptr;
+        nb_tbs--;
+    }
+}
+
+static inline void invalidate_page_bitmap(PageDesc *p)
+{
+    if (p->code_bitmap) {
+        g_free(p->code_bitmap);
+        p->code_bitmap = NULL;
+    }
+    p->code_write_count = 0;
+}
+
+/* Set to NULL all the 'first_tb' fields in all PageDescs. */
+static void page_flush_tb_1(int level, void **lp)
+{
+    int i;
+
+    if (*lp == NULL) {
+        return;
+    }
+    if (level == 0) {
+        PageDesc *pd = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            pd[i].first_tb = NULL;
+            invalidate_page_bitmap(pd + i);
+        }
+    } else {
+        void **pp = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            page_flush_tb_1(level - 1, pp + i);
+        }
+    }
+}
+
+static void page_flush_tb(void)
+{
+    int i;
+
+    for (i = 0; i < V_L1_SIZE; i++) {
+        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+    }
+}
+
+/* flush all the translation blocks */
+/* XXX: tb_flush is currently not thread safe */
+void tb_flush(CPUArchState *env1)
+{
+    CPUArchState *env;
+
+#if defined(DEBUG_FLUSH)
+    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
+           (unsigned long)(code_gen_ptr - code_gen_buffer),
+           nb_tbs, nb_tbs > 0 ?
+           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
+#endif
+    if ((unsigned long)(code_gen_ptr - code_gen_buffer)
+        > code_gen_buffer_size) {
+        cpu_abort(env1, "Internal error: code buffer overflow\n");
+    }
+    nb_tbs = 0;
+
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
+    }
+
+    memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
+    page_flush_tb();
+
+    code_gen_ptr = code_gen_buffer;
+    /* XXX: flush processor icache at this point if cache flush is
+       expensive */
+    tb_flush_count++;
+}
+
+#ifdef DEBUG_TB_CHECK
+
+static void tb_invalidate_check(target_ulong address)
+{
+    TranslationBlock *tb;
+    int i;
+
+    address &= TARGET_PAGE_MASK;
+    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+            if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
+                  address >= tb->pc + tb->size)) {
+                printf("ERROR invalidate: address=" TARGET_FMT_lx
+                       " PC=%08lx size=%04x\n",
+                       address, (long)tb->pc, tb->size);
+            }
+        }
+    }
+}
+
+/* verify that all the pages have correct rights for code */
+static void tb_page_check(void)
+{
+    TranslationBlock *tb;
+    int i, flags1, flags2;
+
+    for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
+        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+            flags1 = page_get_flags(tb->pc);
+            flags2 = page_get_flags(tb->pc + tb->size - 1);
+            if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
+                printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
+                       (long)tb->pc, tb->size, flags1, flags2);
+            }
+        }
+    }
+}
+
+#endif
+
+/* invalidate one TB */
+static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
+                             int next_offset)
+{
+    TranslationBlock *tb1;
+
+    for (;;) {
+        tb1 = *ptb;
+        if (tb1 == tb) {
+            *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
+            break;
+        }
+        ptb = (TranslationBlock **)((char *)tb1 + next_offset);
+    }
+}
+
+static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
+{
+    TranslationBlock *tb1;
+    unsigned int n1;
+
+    for (;;) {
+        tb1 = *ptb;
+        n1 = (uintptr_t)tb1 & 3;
+        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+        if (tb1 == tb) {
+            *ptb = tb1->page_next[n1];
+            break;
+        }
+        ptb = &tb1->page_next[n1];
+    }
+}
+
+static inline void tb_jmp_remove(TranslationBlock *tb, int n)
+{
+    TranslationBlock *tb1, **ptb;
+    unsigned int n1;
+
+    ptb = &tb->jmp_next[n];
+    tb1 = *ptb;
+    if (tb1) {
+        /* find tb(n) in circular list */
+        for (;;) {
+            tb1 = *ptb;
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == n && tb1 == tb) {
+                break;
+            }
+            if (n1 == 2) {
+                ptb = &tb1->jmp_first;
+            } else {
+                ptb = &tb1->jmp_next[n1];
+            }
+        }
+        /* now we can suppress tb(n) from the list */
+        *ptb = tb->jmp_next[n];
+
+        tb->jmp_next[n] = NULL;
+    }
+}
+
+/* reset the jump entry 'n' of a TB so that it is not chained to
+   another TB */
+static inline void tb_reset_jump(TranslationBlock *tb, int n)
+{
+    tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
+}
+
+void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
+{
+    CPUArchState *env;
+    PageDesc *p;
+    unsigned int h, n1;
+    tb_page_addr_t phys_pc;
+    TranslationBlock *tb1, *tb2;
+
+    /* remove the TB from the hash list */
+    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+    h = tb_phys_hash_func(phys_pc);
+    tb_remove(&tb_phys_hash[h], tb,
+              offsetof(TranslationBlock, phys_hash_next));
+
+    /* remove the TB from the page list */
+    if (tb->page_addr[0] != page_addr) {
+        p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
+        tb_page_remove(&p->first_tb, tb);
+        invalidate_page_bitmap(p);
+    }
+    if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
+        p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
+        tb_page_remove(&p->first_tb, tb);
+        invalidate_page_bitmap(p);
+    }
+
+    tb_invalidated_flag = 1;
+
+    /* remove the TB from the hash list */
+    h = tb_jmp_cache_hash_func(tb->pc);
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        if (env->tb_jmp_cache[h] == tb) {
+            env->tb_jmp_cache[h] = NULL;
+        }
+    }
+
+    /* suppress this TB from the two jump lists */
+    tb_jmp_remove(tb, 0);
+    tb_jmp_remove(tb, 1);
+
+    /* suppress any remaining jumps to this TB */
+    tb1 = tb->jmp_first;
+    for (;;) {
+        n1 = (uintptr_t)tb1 & 3;
+        if (n1 == 2) {
+            break;
+        }
+        tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+        tb2 = tb1->jmp_next[n1];
+        tb_reset_jump(tb1, n1);
+        tb1->jmp_next[n1] = NULL;
+        tb1 = tb2;
+    }
+    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
+
+    tb_phys_invalidate_count++;
+}
+
+static inline void set_bits(uint8_t *tab, int start, int len)
+{
+    int end, mask, end1;
+
+    end = start + len;
+    tab += start >> 3;
+    mask = 0xff << (start & 7);
+    if ((start & ~7) == (end & ~7)) {
+        if (start < end) {
+            mask &= ~(0xff << (end & 7));
+            *tab |= mask;
+        }
+    } else {
+        *tab++ |= mask;
+        start = (start + 8) & ~7;
+        end1 = end & ~7;
+        while (start < end1) {
+            *tab++ = 0xff;
+            start += 8;
+        }
+        if (start < end) {
+            mask = ~(0xff << (end & 7));
+            *tab |= mask;
+        }
+    }
+}
+
+static void build_page_bitmap(PageDesc *p)
+{
+    int n, tb_start, tb_end;
+    TranslationBlock *tb;
+
+    p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
+
+    tb = p->first_tb;
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+        /* NOTE: this is subtle as a TB may span two physical pages */
+        if (n == 0) {
+            /* NOTE: tb_end may be after the end of the page, but
+               it is not a problem */
+            tb_start = tb->pc & ~TARGET_PAGE_MASK;
+            tb_end = tb_start + tb->size;
+            if (tb_end > TARGET_PAGE_SIZE) {
+                tb_end = TARGET_PAGE_SIZE;
+            }
+        } else {
+            tb_start = 0;
+            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+        }
+        set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
+        tb = tb->page_next[n];
+    }
+}
+
+TranslationBlock *tb_gen_code(CPUArchState *env,
+                              target_ulong pc, target_ulong cs_base,
+                              int flags, int cflags)
+{
+    TranslationBlock *tb;
+    uint8_t *tc_ptr;
+    tb_page_addr_t phys_pc, phys_page2;
+    target_ulong virt_page2;
+    int code_gen_size;
+
+    phys_pc = get_page_addr_code(env, pc);
+    tb = tb_alloc(pc);
+    if (!tb) {
+        /* flush must be done */
+        tb_flush(env);
+        /* cannot fail at this point */
+        tb = tb_alloc(pc);
+        /* Don't forget to invalidate previous TB info.  */
+        tb_invalidated_flag = 1;
+    }
+    tc_ptr = code_gen_ptr;
+    tb->tc_ptr = tc_ptr;
+    tb->cs_base = cs_base;
+    tb->flags = flags;
+    tb->cflags = cflags;
+    cpu_gen_code(env, tb, &code_gen_size);
+    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
+                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+
+    /* check next page if needed */
+    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
+    phys_page2 = -1;
+    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
+        phys_page2 = get_page_addr_code(env, virt_page2);
+    }
+    tb_link_page(tb, phys_pc, phys_page2);
+    return tb;
+}
+
+/*
+ * Invalidate all TBs which intersect with the target physical address range
+ * [start;end[. NOTE: start and end may refer to *different* physical pages.
+ * 'is_cpu_write_access' should be true if called from a real cpu write
+ * access: the virtual CPU will exit the current TB if code is modified inside
+ * this TB.
+ */
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
+                              int is_cpu_write_access)
+{
+    while (start < end) {
+        tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
+        start &= TARGET_PAGE_MASK;
+        start += TARGET_PAGE_SIZE;
+    }
+}
+
+/*
+ * Invalidate all TBs which intersect with the target physical address range
+ * [start;end[. NOTE: start and end must refer to the *same* physical page.
+ * 'is_cpu_write_access' should be true if called from a real cpu write
+ * access: the virtual CPU will exit the current TB if code is modified inside
+ * this TB.
+ */
+void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
+                                   int is_cpu_write_access)
+{
+    TranslationBlock *tb, *tb_next, *saved_tb;
+    CPUArchState *env = cpu_single_env;
+    tb_page_addr_t tb_start, tb_end;
+    PageDesc *p;
+    int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+    int current_tb_not_found = is_cpu_write_access;
+    TranslationBlock *current_tb = NULL;
+    int current_tb_modified = 0;
+    target_ulong current_pc = 0;
+    target_ulong current_cs_base = 0;
+    int current_flags = 0;
+#endif /* TARGET_HAS_PRECISE_SMC */
+
+    p = page_find(start >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    if (!p->code_bitmap &&
+        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
+        is_cpu_write_access) {
+        /* build code bitmap */
+        build_page_bitmap(p);
+    }
+
+    /* we remove all the TBs in the range [start, end[ */
+    /* XXX: see if in some cases it could be faster to invalidate all
+       the code */
+    tb = p->first_tb;
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+        tb_next = tb->page_next[n];
+        /* NOTE: this is subtle as a TB may span two physical pages */
+        if (n == 0) {
+            /* NOTE: tb_end may be after the end of the page, but
+               it is not a problem */
+            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+            tb_end = tb_start + tb->size;
+        } else {
+            tb_start = tb->page_addr[1];
+            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+        }
+        if (!(tb_end <= start || tb_start >= end)) {
+#ifdef TARGET_HAS_PRECISE_SMC
+            if (current_tb_not_found) {
+                current_tb_not_found = 0;
+                current_tb = NULL;
+                if (env->mem_io_pc) {
+                    /* now we have a real cpu fault */
+                    current_tb = tb_find_pc(env->mem_io_pc);
+                }
+            }
+            if (current_tb == tb &&
+                (current_tb->cflags & CF_COUNT_MASK) != 1) {
+                /* If we are modifying the current TB, we must stop
+                its execution. We could be more precise by checking
+                that the modification is after the current PC, but it
+                would require a specialized function to partially
+                restore the CPU state */
+
+                current_tb_modified = 1;
+                cpu_restore_state_from_tb(current_tb, env, env->mem_io_pc);
+                cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
+                                     &current_flags);
+            }
+#endif /* TARGET_HAS_PRECISE_SMC */
+            /* we need to do that to handle the case where a signal
+               occurs while doing tb_phys_invalidate() */
+            saved_tb = NULL;
+            if (env) {
+                saved_tb = env->current_tb;
+                env->current_tb = NULL;
+            }
+            tb_phys_invalidate(tb, -1);
+            if (env) {
+                env->current_tb = saved_tb;
+                if (env->interrupt_request && env->current_tb) {
+                    cpu_interrupt(env, env->interrupt_request);
+                }
+            }
+        }
+        tb = tb_next;
+    }
+#if !defined(CONFIG_USER_ONLY)
+    /* if no code remaining, no need to continue to use slow writes */
+    if (!p->first_tb) {
+        invalidate_page_bitmap(p);
+        if (is_cpu_write_access) {
+            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
+        }
+    }
+#endif
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (current_tb_modified) {
+        /* we generate a block containing just the instruction
+           modifying the memory. It will ensure that it cannot modify
+           itself */
+        env->current_tb = NULL;
+        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
+        cpu_resume_from_signal(env, NULL);
+    }
+#endif
+}
+
+/* len must be <= 8 and start must be a multiple of len */
+void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
+{
+    PageDesc *p;
+    int offset, b;
+
+#if 0
+    if (1) {
+        qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
+                  cpu_single_env->mem_io_vaddr, len,
+                  cpu_single_env->eip,
+                  cpu_single_env->eip +
+                  (intptr_t)cpu_single_env->segs[R_CS].base);
+    }
+#endif
+    p = page_find(start >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    if (p->code_bitmap) {
+        offset = start & ~TARGET_PAGE_MASK;
+        b = p->code_bitmap[offset >> 3] >> (offset & 7);
+        if (b & ((1 << len) - 1)) {
+            goto do_invalidate;
+        }
+    } else {
+    do_invalidate:
+        tb_invalidate_phys_page_range(start, start + len, 1);
+    }
+}
+
+#if !defined(CONFIG_SOFTMMU)
+static void tb_invalidate_phys_page(tb_page_addr_t addr,
+                                    uintptr_t pc, void *puc)
+{
+    TranslationBlock *tb;
+    PageDesc *p;
+    int n;
+#ifdef TARGET_HAS_PRECISE_SMC
+    TranslationBlock *current_tb = NULL;
+    CPUArchState *env = cpu_single_env;
+    int current_tb_modified = 0;
+    target_ulong current_pc = 0;
+    target_ulong current_cs_base = 0;
+    int current_flags = 0;
+#endif
+
+    addr &= TARGET_PAGE_MASK;
+    p = page_find(addr >> TARGET_PAGE_BITS);
+    if (!p) {
+        return;
+    }
+    tb = p->first_tb;
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (tb && pc != 0) {
+        current_tb = tb_find_pc(pc);
+    }
+#endif
+    while (tb != NULL) {
+        n = (uintptr_t)tb & 3;
+        tb = (TranslationBlock *)((uintptr_t)tb & ~3);
+#ifdef TARGET_HAS_PRECISE_SMC
+        if (current_tb == tb &&
+            (current_tb->cflags & CF_COUNT_MASK) != 1) {
+                /* If we are modifying the current TB, we must stop
+                   its execution. We could be more precise by checking
+                   that the modification is after the current PC, but it
+                   would require a specialized function to partially
+                   restore the CPU state */
+
+            current_tb_modified = 1;
+            cpu_restore_state_from_tb(current_tb, env, pc);
+            cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
+                                 &current_flags);
+        }
+#endif /* TARGET_HAS_PRECISE_SMC */
+        tb_phys_invalidate(tb, addr);
+        tb = tb->page_next[n];
+    }
+    p->first_tb = NULL;
+#ifdef TARGET_HAS_PRECISE_SMC
+    if (current_tb_modified) {
+        /* we generate a block containing just the instruction
+           modifying the memory. It will ensure that it cannot modify
+           itself */
+        env->current_tb = NULL;
+        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
+        cpu_resume_from_signal(env, puc);
+    }
+#endif
+}
+#endif
+
+/* add the tb in the target page and protect it if necessary */
+static inline void tb_alloc_page(TranslationBlock *tb,
+                                 unsigned int n, tb_page_addr_t page_addr)
+{
+    PageDesc *p;
+#ifndef CONFIG_USER_ONLY
+    bool page_already_protected;
+#endif
+
+    tb->page_addr[n] = page_addr;
+    p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
+    tb->page_next[n] = p->first_tb;
+#ifndef CONFIG_USER_ONLY
+    page_already_protected = p->first_tb != NULL;
+#endif
+    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
+    invalidate_page_bitmap(p);
+
+#if defined(TARGET_HAS_SMC) || 1
+
+#if defined(CONFIG_USER_ONLY)
+    if (p->flags & PAGE_WRITE) {
+        target_ulong addr;
+        PageDesc *p2;
+        int prot;
+
+        /* force the host page as non writable (writes will have a
+           page fault + mprotect overhead) */
+        page_addr &= qemu_host_page_mask;
+        prot = 0;
+        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
+            addr += TARGET_PAGE_SIZE) {
+
+            p2 = page_find(addr >> TARGET_PAGE_BITS);
+            if (!p2) {
+                continue;
+            }
+            prot |= p2->flags;
+            p2->flags &= ~PAGE_WRITE;
+          }
+        mprotect(g2h(page_addr), qemu_host_page_size,
+                 (prot & PAGE_BITS) & ~PAGE_WRITE);
+#ifdef DEBUG_TB_INVALIDATE
+        printf("protecting code page: 0x" TARGET_FMT_lx "\n",
+               page_addr);
+#endif
+    }
+#else
+    /* if some code is already present, then the pages are already
+       protected. So we handle the case where only the first TB is
+       allocated in a physical page */
+    if (!page_already_protected) {
+        tlb_protect_code(page_addr);
+    }
+#endif
+
+#endif /* TARGET_HAS_SMC */
+}
+
+/* add a new TB and link it to the physical page tables. phys_page2 is
+   (-1) to indicate that only one page contains the TB. */
+static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+                         tb_page_addr_t phys_page2)
+{
+    unsigned int h;
+    TranslationBlock **ptb;
+
+    /* Grab the mmap lock to stop another thread invalidating this TB
+       before we are done.  */
+    mmap_lock();
+    /* add in the physical hash table */
+    h = tb_phys_hash_func(phys_pc);
+    ptb = &tb_phys_hash[h];
+    tb->phys_hash_next = *ptb;
+    *ptb = tb;
+
+    /* add in the page list */
+    tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
+    if (phys_page2 != -1) {
+        tb_alloc_page(tb, 1, phys_page2);
+    } else {
+        tb->page_addr[1] = -1;
+    }
+
+    tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
+    tb->jmp_next[0] = NULL;
+    tb->jmp_next[1] = NULL;
+
+    /* init original jump addresses */
+    if (tb->tb_next_offset[0] != 0xffff) {
+        tb_reset_jump(tb, 0);
+    }
+    if (tb->tb_next_offset[1] != 0xffff) {
+        tb_reset_jump(tb, 1);
+    }
+
+#ifdef DEBUG_TB_CHECK
+    tb_page_check();
+#endif
+    mmap_unlock();
+}
+
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+/* check whether the given addr is in TCG generated code buffer or not */
+bool is_tcg_gen_code(uintptr_t tc_ptr)
+{
+    /* This can be called during code generation, code_gen_buffer_max_size
+       is used instead of code_gen_ptr for upper boundary checking */
+    return (tc_ptr >= (uintptr_t)code_gen_buffer &&
+            tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
+}
+#endif
+
+/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
+   tb[1].tc_ptr. Return NULL if not found */
+static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
+{
+    int m_min, m_max, m;
+    uintptr_t v;
+    TranslationBlock *tb;
+
+    if (nb_tbs <= 0) {
+        return NULL;
+    }
+    if (tc_ptr < (uintptr_t)code_gen_buffer ||
+        tc_ptr >= (uintptr_t)code_gen_ptr) {
+        return NULL;
+    }
+    /* binary search (cf Knuth) */
+    m_min = 0;
+    m_max = nb_tbs - 1;
+    while (m_min <= m_max) {
+        m = (m_min + m_max) >> 1;
+        tb = &tbs[m];
+        v = (uintptr_t)tb->tc_ptr;
+        if (v == tc_ptr) {
+            return tb;
+        } else if (tc_ptr < v) {
+            m_max = m - 1;
+        } else {
+            m_min = m + 1;
+        }
+    }
+    return &tbs[m_max];
+}
+
+static void tb_reset_jump_recursive(TranslationBlock *tb);
+
+static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
+{
+    TranslationBlock *tb1, *tb_next, **ptb;
+    unsigned int n1;
+
+    tb1 = tb->jmp_next[n];
+    if (tb1 != NULL) {
+        /* find head of list */
+        for (;;) {
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == 2) {
+                break;
+            }
+            tb1 = tb1->jmp_next[n1];
+        }
+        /* we are now sure now that tb jumps to tb1 */
+        tb_next = tb1;
+
+        /* remove tb from the jmp_first list */
+        ptb = &tb_next->jmp_first;
+        for (;;) {
+            tb1 = *ptb;
+            n1 = (uintptr_t)tb1 & 3;
+            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
+            if (n1 == n && tb1 == tb) {
+                break;
+            }
+            ptb = &tb1->jmp_next[n1];
+        }
+        *ptb = tb->jmp_next[n];
+        tb->jmp_next[n] = NULL;
+
+        /* suppress the jump to next tb in generated code */
+        tb_reset_jump(tb, n);
+
+        /* suppress jumps in the tb on which we could have jumped */
+        tb_reset_jump_recursive(tb_next);
+    }
+}
+
+static void tb_reset_jump_recursive(TranslationBlock *tb)
+{
+    tb_reset_jump_recursive2(tb, 0);
+    tb_reset_jump_recursive2(tb, 1);
+}
+
+#if defined(TARGET_HAS_ICE) && !defined(CONFIG_USER_ONLY)
+void tb_invalidate_phys_addr(hwaddr addr)
+{
+    ram_addr_t ram_addr;
+    MemoryRegionSection *section;
+
+    section = phys_page_find(address_space_memory.dispatch,
+                             addr >> TARGET_PAGE_BITS);
+    if (!(memory_region_is_ram(section->mr)
+          || (section->mr->rom_device && section->mr->readable))) {
+        return;
+    }
+    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
+        + memory_region_section_addr(section, addr);
+    tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
+}
+#endif /* TARGET_HAS_ICE && !defined(CONFIG_USER_ONLY) */
+
+void cpu_unlink_tb(CPUArchState *env)
+{
+    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
+       problem and hope the cpu will stop of its own accord.  For userspace
+       emulation this often isn't actually as bad as it sounds.  Often
+       signals are used primarily to interrupt blocking syscalls.  */
+    TranslationBlock *tb;
+    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
+
+    spin_lock(&interrupt_lock);
+    tb = env->current_tb;
+    /* if the cpu is currently executing code, we must unlink it and
+       all the potentially executing TB */
+    if (tb) {
+        env->current_tb = NULL;
+        tb_reset_jump_recursive(tb);
+    }
+    spin_unlock(&interrupt_lock);
+}
+
+void tb_check_watchpoint(CPUArchState *env)
+{
+    TranslationBlock *tb;
+
+    tb = tb_find_pc(env->mem_io_pc);
+    if (!tb) {
+        cpu_abort(env, "check_watchpoint: could not find TB for pc=%p",
+                  (void *)env->mem_io_pc);
+    }
+    cpu_restore_state_from_tb(tb, env, env->mem_io_pc);
+    tb_phys_invalidate(tb, -1);
+}
+
+#ifndef CONFIG_USER_ONLY
+/* mask must never be zero, except for A20 change call */
+static void tcg_handle_interrupt(CPUArchState *env, int mask)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    int old_mask;
+
+    old_mask = env->interrupt_request;
+    env->interrupt_request |= mask;
+
+    /*
+     * If called from iothread context, wake the target cpu in
+     * case its halted.
+     */
+    if (!qemu_cpu_is_self(cpu)) {
+        qemu_cpu_kick(cpu);
+        return;
+    }
+
+    if (use_icount) {
+        env->icount_decr.u16.high = 0xffff;
+        if (!can_do_io(env)
+            && (mask & ~old_mask) != 0) {
+            cpu_abort(env, "Raised interrupt while not in I/O function");
+        }
+    } else {
+        cpu_unlink_tb(env);
+    }
+}
+
+CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
+
+/* in deterministic execution mode, instructions doing device I/Os
+   must be at the end of the TB */
+void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
+{
+    TranslationBlock *tb;
+    uint32_t n, cflags;
+    target_ulong pc, cs_base;
+    uint64_t flags;
+
+    tb = tb_find_pc(retaddr);
+    if (!tb) {
+        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
+                  (void *)retaddr);
+    }
+    n = env->icount_decr.u16.low + tb->icount;
+    cpu_restore_state_from_tb(tb, env, retaddr);
+    /* Calculate how many instructions had been executed before the fault
+       occurred.  */
+    n = n - env->icount_decr.u16.low;
+    /* Generate a new TB ending on the I/O insn.  */
+    n++;
+    /* On MIPS and SH, delay slot instructions can only be restarted if
+       they were already the first instruction in the TB.  If this is not
+       the first instruction in a TB then re-execute the preceding
+       branch.  */
+#if defined(TARGET_MIPS)
+    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
+        env->active_tc.PC -= 4;
+        env->icount_decr.u16.low++;
+        env->hflags &= ~MIPS_HFLAG_BMASK;
+    }
+#elif defined(TARGET_SH4)
+    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
+            && n > 1) {
+        env->pc -= 2;
+        env->icount_decr.u16.low++;
+        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+    }
+#endif
+    /* This should never happen.  */
+    if (n > CF_COUNT_MASK) {
+        cpu_abort(env, "TB too big during recompile");
+    }
+
+    cflags = n | CF_LAST_IO;
+    pc = tb->pc;
+    cs_base = tb->cs_base;
+    flags = tb->flags;
+    tb_phys_invalidate(tb, -1);
+    /* FIXME: In theory this could raise an exception.  In practice
+       we have already translated the block once so it's probably ok.  */
+    tb_gen_code(env, pc, cs_base, flags, cflags);
+    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
+       the first in the TB) then we end up generating a whole new TB and
+       repeating the fault, which is horribly inefficient.
+       Better would be to execute just this insn uncached, or generate a
+       second new TB.  */
+    cpu_resume_from_signal(env, NULL);
+}
+
+void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
+{
+    unsigned int i;
+
+    /* Discard jump cache entries for any tb which might potentially
+       overlap the flushed page.  */
+    i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
+    memset(&env->tb_jmp_cache[i], 0,
+           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+
+    i = tb_jmp_cache_hash_page(addr);
+    memset(&env->tb_jmp_cache[i], 0,
+           TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+}
+
+void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
+{
+    int i, target_code_size, max_target_code_size;
+    int direct_jmp_count, direct_jmp2_count, cross_page;
+    TranslationBlock *tb;
+
+    target_code_size = 0;
+    max_target_code_size = 0;
+    cross_page = 0;
+    direct_jmp_count = 0;
+    direct_jmp2_count = 0;
+    for (i = 0; i < nb_tbs; i++) {
+        tb = &tbs[i];
+        target_code_size += tb->size;
+        if (tb->size > max_target_code_size) {
+            max_target_code_size = tb->size;
+        }
+        if (tb->page_addr[1] != -1) {
+            cross_page++;
+        }
+        if (tb->tb_next_offset[0] != 0xffff) {
+            direct_jmp_count++;
+            if (tb->tb_next_offset[1] != 0xffff) {
+                direct_jmp2_count++;
+            }
+        }
+    }
+    /* XXX: avoid using doubles ? */
+    cpu_fprintf(f, "Translation buffer state:\n");
+    cpu_fprintf(f, "gen code size       %td/%zd\n",
+                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
+    cpu_fprintf(f, "TB count            %d/%d\n",
+                nb_tbs, code_gen_max_blocks);
+    cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
+                nb_tbs ? target_code_size / nb_tbs : 0,
+                max_target_code_size);
+    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
+                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
+                target_code_size ? (double) (code_gen_ptr - code_gen_buffer)
+                / target_code_size : 0);
+    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
+            cross_page,
+            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
+    cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
+                direct_jmp_count,
+                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
+                direct_jmp2_count,
+                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
+    cpu_fprintf(f, "\nStatistics:\n");
+    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
+    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
+    cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
+    tcg_dump_info(f, cpu_fprintf);
+}
+
+#else /* CONFIG_USER_ONLY */
+
+void cpu_interrupt(CPUArchState *env, int mask)
+{
+    env->interrupt_request |= mask;
+    cpu_unlink_tb(env);
+}
+
+/*
+ * Walks guest process memory "regions" one by one
+ * and calls callback function 'fn' for each region.
+ */
+struct walk_memory_regions_data {
+    walk_memory_regions_fn fn;
+    void *priv;
+    uintptr_t start;
+    int prot;
+};
+
+static int walk_memory_regions_end(struct walk_memory_regions_data *data,
+                                   abi_ulong end, int new_prot)
+{
+    if (data->start != -1ul) {
+        int rc = data->fn(data->priv, data->start, end, data->prot);
+        if (rc != 0) {
+            return rc;
+        }
+    }
+
+    data->start = (new_prot ? end : -1ul);
+    data->prot = new_prot;
+
+    return 0;
+}
+
+static int walk_memory_regions_1(struct walk_memory_regions_data *data,
+                                 abi_ulong base, int level, void **lp)
+{
+    abi_ulong pa;
+    int i, rc;
+
+    if (*lp == NULL) {
+        return walk_memory_regions_end(data, base, 0);
+    }
+
+    if (level == 0) {
+        PageDesc *pd = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            int prot = pd[i].flags;
+
+            pa = base | (i << TARGET_PAGE_BITS);
+            if (prot != data->prot) {
+                rc = walk_memory_regions_end(data, pa, prot);
+                if (rc != 0) {
+                    return rc;
+                }
+            }
+        }
+    } else {
+        void **pp = *lp;
+
+        for (i = 0; i < L2_SIZE; ++i) {
+            pa = base | ((abi_ulong)i <<
+                (TARGET_PAGE_BITS + L2_BITS * level));
+            rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
+            if (rc != 0) {
+                return rc;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
+{
+    struct walk_memory_regions_data data;
+    uintptr_t i;
+
+    data.fn = fn;
+    data.priv = priv;
+    data.start = -1ul;
+    data.prot = 0;
+
+    for (i = 0; i < V_L1_SIZE; i++) {
+        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
+                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+
+        if (rc != 0) {
+            return rc;
+        }
+    }
+
+    return walk_memory_regions_end(&data, 0, 0);
+}
+
+static int dump_region(void *priv, abi_ulong start,
+    abi_ulong end, unsigned long prot)
+{
+    FILE *f = (FILE *)priv;
+
+    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
+        " "TARGET_ABI_FMT_lx" %c%c%c\n",
+        start, end, end - start,
+        ((prot & PAGE_READ) ? 'r' : '-'),
+        ((prot & PAGE_WRITE) ? 'w' : '-'),
+        ((prot & PAGE_EXEC) ? 'x' : '-'));
+
+    return 0;
+}
+
+/* dump memory mappings */
+void page_dump(FILE *f)
+{
+    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
+            "start", "end", "size", "prot");
+    walk_memory_regions(f, dump_region);
+}
+
+int page_get_flags(target_ulong address)
+{
+    PageDesc *p;
+
+    p = page_find(address >> TARGET_PAGE_BITS);
+    if (!p) {
+        return 0;
+    }
+    return p->flags;
+}
+
+/* Modify the flags of a page and invalidate the code if necessary.
+   The flag PAGE_WRITE_ORG is positioned automatically depending
+   on PAGE_WRITE.  The mmap_lock should already be held.  */
+void page_set_flags(target_ulong start, target_ulong end, int flags)
+{
+    target_ulong addr, len;
+
+    /* This function should never be called with addresses outside the
+       guest address space.  If this assert fires, it probably indicates
+       a missing call to h2g_valid.  */
+#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
+    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+#endif
+    assert(start < end);
+
+    start = start & TARGET_PAGE_MASK;
+    end = TARGET_PAGE_ALIGN(end);
+
+    if (flags & PAGE_WRITE) {
+        flags |= PAGE_WRITE_ORG;
+    }
+
+    for (addr = start, len = end - start;
+         len != 0;
+         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+
+        /* If the write protection bit is set, then we invalidate
+           the code inside.  */
+        if (!(p->flags & PAGE_WRITE) &&
+            (flags & PAGE_WRITE) &&
+            p->first_tb) {
+            tb_invalidate_phys_page(addr, 0, NULL);
+        }
+        p->flags = flags;
+    }
+}
+
+int page_check_range(target_ulong start, target_ulong len, int flags)
+{
+    PageDesc *p;
+    target_ulong end;
+    target_ulong addr;
+
+    /* This function should never be called with addresses outside the
+       guest address space.  If this assert fires, it probably indicates
+       a missing call to h2g_valid.  */
+#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
+    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+#endif
+
+    if (len == 0) {
+        return 0;
+    }
+    if (start + len - 1 < start) {
+        /* We've wrapped around.  */
+        return -1;
+    }
+
+    /* must do before we loose bits in the next step */
+    end = TARGET_PAGE_ALIGN(start + len);
+    start = start & TARGET_PAGE_MASK;
+
+    for (addr = start, len = end - start;
+         len != 0;
+         len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
+        p = page_find(addr >> TARGET_PAGE_BITS);
+        if (!p) {
+            return -1;
+        }
+        if (!(p->flags & PAGE_VALID)) {
+            return -1;
+        }
+
+        if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
+            return -1;
+        }
+        if (flags & PAGE_WRITE) {
+            if (!(p->flags & PAGE_WRITE_ORG)) {
+                return -1;
+            }
+            /* unprotect the page if it was put read-only because it
+               contains translated code */
+            if (!(p->flags & PAGE_WRITE)) {
+                if (!page_unprotect(addr, 0, NULL)) {
+                    return -1;
+                }
+            }
+            return 0;
+        }
+    }
+    return 0;
+}
+
+/* called from signal handler: invalidate the code and unprotect the
+   page. Return TRUE if the fault was successfully handled. */
+int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
+{
+    unsigned int prot;
+    PageDesc *p;
+    target_ulong host_start, host_end, addr;
+
+    /* Technically this isn't safe inside a signal handler.  However we
+       know this only ever happens in a synchronous SEGV handler, so in
+       practice it seems to be ok.  */
+    mmap_lock();
+
+    p = page_find(address >> TARGET_PAGE_BITS);
+    if (!p) {
+        mmap_unlock();
+        return 0;
+    }
+
+    /* if the page was really writable, then we change its
+       protection back to writable */
+    if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
+        host_start = address & qemu_host_page_mask;
+        host_end = host_start + qemu_host_page_size;
+
+        prot = 0;
+        for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
+            p = page_find(addr >> TARGET_PAGE_BITS);
+            p->flags |= PAGE_WRITE;
+            prot |= p->flags;
+
+            /* and since the content will be modified, we must invalidate
+               the corresponding translated code. */
+            tb_invalidate_phys_page(addr, pc, puc);
+#ifdef DEBUG_TB_CHECK
+            tb_invalidate_check(addr);
+#endif
+        }
+        mprotect((void *)g2h(host_start), qemu_host_page_size,
+                 prot & PAGE_BITS);
+
+        mmap_unlock();
+        return 1;
+    }
+    mmap_unlock();
+    return 0;
+}
+#endif /* CONFIG_USER_ONLY */
diff --git a/translate-all.h b/translate-all.h
new file mode 100644
index 0000000000..b181fb48ad
--- /dev/null
+++ b/translate-all.h
@@ -0,0 +1,34 @@
+/*
+ *  Translated block handling
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef TRANSLATE_ALL_H
+#define TRANSLATE_ALL_H
+
+/* Size of the L2 (and L3, etc) page tables.  */
+#define L2_BITS 10
+#define L2_SIZE (1 << L2_BITS)
+
+#define P_L2_LEVELS \
+    (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
+
+/* translate-all.c */
+void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
+void cpu_unlink_tb(CPUArchState *env);
+void tb_check_watchpoint(CPUArchState *env);
+
+#endif /* TRANSLATE_ALL_H */
diff --git a/ui/vnc-enc-tight.c b/ui/vnc-enc-tight.c
index 9ae4cabffc..62d0fde77f 100644
--- a/ui/vnc-enc-tight.c
+++ b/ui/vnc-enc-tight.c
@@ -1212,7 +1212,7 @@ static int send_jpeg_rect(VncState *vs, int x, int y, int w, int h, int quality)
     buf = (uint8_t *)pixman_image_get_data(linebuf);
     row[0] = buf;
     for (dy = 0; dy < h; dy++) {
-        qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, dy);
+        qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, x, y + dy);
         jpeg_write_scanlines(&cinfo, row, 1);
     }
     qemu_pixman_image_unref(linebuf);
@@ -1356,7 +1356,7 @@ static int send_png_rect(VncState *vs, int x, int y, int w, int h,
         if (color_type == PNG_COLOR_TYPE_PALETTE) {
             memcpy(buf, vs->tight.tight.buffer + (dy * w), w);
         } else {
-            qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, dy);
+            qemu_pixman_linebuf_fill(linebuf, vs->vd->server, w, x, y + dy);
         }
         png_write_row(png_ptr, buf);
     }
diff --git a/ui/vnc.c b/ui/vnc.c
index ba303626ad..04afcffc52 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -2569,7 +2569,7 @@ static int vnc_refresh_server_surface(VncDisplay *vd)
             uint8_t *server_ptr;
 
             if (vd->guest.format != VNC_SERVER_FB_FORMAT) {
-                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, y);
+                qemu_pixman_linebuf_fill(tmpbuf, vd->guest.fb, width, 0, y);
                 guest_ptr = (uint8_t *)pixman_image_get_data(tmpbuf);
             } else {
                 guest_ptr = guest_row;
diff --git a/user-exec.c b/user-exec.c
index ef9b1727b3..1185cb03c8 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -81,7 +81,6 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
                                     int is_write, sigset_t *old_set,
                                     void *puc)
 {
-    TranslationBlock *tb;
     int ret;
 
 #if defined(DEBUG_SIGNAL)
@@ -104,12 +103,7 @@ static inline int handle_cpu_signal(uintptr_t pc, unsigned long address,
         return 1; /* the MMU fault was handled without causing real CPU fault */
     }
     /* now we have a real cpu fault */
-    tb = tb_find_pc(pc);
-    if (tb) {
-        /* the PC is inside the translated code. It means that we have
-           a virtual CPU fault */
-        cpu_restore_state(tb, cpu_single_env, pc);
-    }
+    cpu_restore_state(cpu_single_env, pc);
 
     /* we restore the process signal mask as the sigreturn should
        do it (XXX: use sigsetjmp) */
diff --git a/vl.c b/vl.c
index a3ab3841a7..3ebf01f8f1 100644
--- a/vl.c
+++ b/vl.c
@@ -886,9 +886,9 @@ static int cleanup_add_fd(QemuOpts *opts, void *opaque)
 
 static int drive_init_func(QemuOpts *opts, void *opaque)
 {
-    int *use_scsi = opaque;
+    BlockInterfaceType *block_default_type = opaque;
 
-    return drive_init(opts, *use_scsi) == NULL;
+    return drive_init(opts, *block_default_type) == NULL;
 }
 
 static int drive_enable_snapshot(QemuOpts *opts, void *opaque)
@@ -899,16 +899,11 @@ static int drive_enable_snapshot(QemuOpts *opts, void *opaque)
     return 0;
 }
 
-static void default_drive(int enable, int snapshot, int use_scsi,
-                          BlockInterfaceType type, int index,
-                          const char *optstr)
+static void default_drive(int enable, int snapshot, BlockInterfaceType type,
+                          int index, const char *optstr)
 {
     QemuOpts *opts;
 
-    if (type == IF_DEFAULT) {
-        type = use_scsi ? IF_SCSI : IF_IDE;
-    }
-
     if (!enable || drive_get_by_index(type, index)) {
         return;
     }
@@ -917,7 +912,7 @@ static void default_drive(int enable, int snapshot, int use_scsi,
     if (snapshot) {
         drive_enable_snapshot(opts, NULL);
     }
-    if (!drive_init(opts, use_scsi)) {
+    if (!drive_init(opts, type)) {
         exit(1);
     }
 }
@@ -2001,7 +1996,7 @@ static int balloon_parse(const char *arg)
                 return  -1;
         } else {
             /* create empty opts */
-            opts = qemu_opts_create(qemu_find_opts("device"), NULL, 0, NULL);
+            opts = qemu_opts_create_nofail(qemu_find_opts("device"));
         }
         qemu_opt_set(opts, "driver", "virtio-balloon");
         return 0;
@@ -2251,14 +2246,14 @@ static int virtcon_parse(const char *devname)
         exit(1);
     }
 
-    bus_opts = qemu_opts_create(device, NULL, 0, NULL);
+    bus_opts = qemu_opts_create_nofail(device);
     if (arch_type == QEMU_ARCH_S390X) {
         qemu_opt_set(bus_opts, "driver", "virtio-serial-s390");
     } else {
         qemu_opt_set(bus_opts, "driver", "virtio-serial-pci");
     } 
 
-    dev_opts = qemu_opts_create(device, NULL, 0, NULL);
+    dev_opts = qemu_opts_create_nofail(device);
     qemu_opt_set(dev_opts, "driver", "virtconsole");
 
     snprintf(label, sizeof(label), "virtcon%d", index);
@@ -3110,8 +3105,7 @@ int main(int argc, char **argv, char **envp)
 
                 qemu_opt_set_bool(fsdev, "readonly",
                                 qemu_opt_get_bool(opts, "readonly", 0));
-                device = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
-                                          NULL);
+                device = qemu_opts_create_nofail(qemu_find_opts("device"));
                 qemu_opt_set(device, "driver", "virtio-9p-pci");
                 qemu_opt_set(device, "fsdev",
                              qemu_opt_get(opts, "mount_tag"));
@@ -3131,8 +3125,7 @@ int main(int argc, char **argv, char **envp)
                 }
                 qemu_opt_set(fsdev, "fsdriver", "synth");
 
-                device = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
-                                          NULL);
+                device = qemu_opts_create_nofail(qemu_find_opts("device"));
                 qemu_opt_set(device, "driver", "virtio-9p-pci");
                 qemu_opt_set(device, "fsdev", "v_synth");
                 qemu_opt_set(device, "mount_tag", "v_synth");
@@ -3770,15 +3763,15 @@ int main(int argc, char **argv, char **envp)
     /* open the virtual block devices */
     if (snapshot)
         qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot, NULL, 0);
-    if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func, &machine->use_scsi, 1) != 0)
+    if (qemu_opts_foreach(qemu_find_opts("drive"), drive_init_func,
+                          &machine->block_default_type, 1) != 0) {
         exit(1);
+    }
 
-    default_drive(default_cdrom, snapshot, machine->use_scsi,
-                  IF_DEFAULT, 2, CDROM_OPTS);
-    default_drive(default_floppy, snapshot, machine->use_scsi,
-                  IF_FLOPPY, 0, FD_OPTS);
-    default_drive(default_sdcard, snapshot, machine->use_scsi,
-                  IF_SD, 0, SD_OPTS);
+    default_drive(default_cdrom, snapshot, machine->block_default_type, 2,
+                  CDROM_OPTS);
+    default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
+    default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);
 
     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);