aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block-migration.c7
-rw-r--r--block.c124
-rw-r--r--block/bochs.c2
-rw-r--r--block/cow.c2
-rw-r--r--block/mirror.c382
-rw-r--r--block/qcow.c2
-rw-r--r--block/qcow2.c2
-rw-r--r--block/qed.c2
-rw-r--r--block/vdi.c25
-rw-r--r--block/vmdk.c4
-rw-r--r--blockdev.c66
-rw-r--r--cpus.c8
-rw-r--r--docs/virtio-balloon-stats.txt104
-rw-r--r--hmp-commands.hx39
-rw-r--r--hmp.c60
-rw-r--r--hmp.h2
-rw-r--r--hw/boards.h1
-rw-r--r--hw/fw_cfg.c1
-rw-r--r--hw/ide/ahci.c98
-rw-r--r--hw/ide/ahci.h20
-rw-r--r--hw/ide/core.c33
-rw-r--r--hw/ide/ich.c14
-rw-r--r--hw/m25p80.c2
-rw-r--r--hw/pc.c40
-rw-r--r--hw/pc_piix.c26
-rw-r--r--hw/ppc/mac_newworld.c1
-rw-r--r--hw/ppc/mac_oldworld.c1
-rw-r--r--hw/s390x/Makefile.objs5
-rw-r--r--hw/s390x/css.c1277
-rw-r--r--hw/s390x/css.h99
-rw-r--r--hw/s390x/ipl.c2
-rw-r--r--hw/s390x/s390-virtio-bus.c (renamed from hw/s390-virtio-bus.c)16
-rw-r--r--hw/s390x/s390-virtio-bus.h (renamed from hw/s390-virtio-bus.h)12
-rw-r--r--hw/s390x/s390-virtio-ccw.c134
-rw-r--r--hw/s390x/s390-virtio-hcall.c2
-rw-r--r--hw/s390x/s390-virtio.c (renamed from hw/s390-virtio.c)131
-rw-r--r--hw/s390x/s390-virtio.h (renamed from hw/s390-virtio.h)6
-rw-r--r--hw/s390x/virtio-ccw.c960
-rw-r--r--hw/s390x/virtio-ccw.h98
-rw-r--r--hw/sun4m.c3
-rw-r--r--hw/sun4u.c1
-rw-r--r--hw/virtio-balloon.c175
-rw-r--r--hw/xilinx_ethlite.c23
-rw-r--r--include/block/block.h11
-rw-r--r--include/block/block_int.h10
-rw-r--r--include/qemu-common.h3
-rw-r--r--include/qemu/hbitmap.h208
-rw-r--r--include/qemu/host-utils.h26
-rw-r--r--include/qom/cpu.h17
-rw-r--r--include/qom/object.h8
-rw-r--r--include/sysemu/cpus.h7
-rw-r--r--include/sysemu/kvm.h6
-rw-r--r--kvm-all.c5
-rw-r--r--kvm-stub.c2
-rw-r--r--qapi-schema.json109
-rw-r--r--qemu-char.c203
-rw-r--r--qemu-options.hx10
-rw-r--r--qga/commands-posix.c20
-rw-r--r--qmp-commands.hx89
-rw-r--r--qom/cpu.c13
-rw-r--r--qom/object.c5
-rwxr-xr-xscripts/kvm/vmxcap1
-rw-r--r--target-alpha/cpu.c16
-rw-r--r--target-arm/cpu.c18
-rw-r--r--target-arm/helper.c6
-rw-r--r--target-i386/cpu.c389
-rw-r--r--target-i386/cpu.h12
-rw-r--r--target-i386/kvm.c32
-rw-r--r--target-i386/topology.h136
-rw-r--r--target-m68k/cpu.c20
-rw-r--r--target-m68k/helper.c6
-rw-r--r--target-openrisc/cpu.c36
-rw-r--r--target-openrisc/exception_helper.c2
-rw-r--r--target-openrisc/fpu_helper.c32
-rw-r--r--target-openrisc/int_helper.c2
-rw-r--r--target-openrisc/interrupt_helper.c2
-rw-r--r--target-openrisc/mmu.c6
-rw-r--r--target-openrisc/sys_helper.c4
-rw-r--r--target-ppc/kvm.c5
-rw-r--r--target-ppc/translate_init.c2
-rw-r--r--target-s390x/Makefile.objs2
-rw-r--r--target-s390x/cpu.h247
-rw-r--r--target-s390x/helper.c200
-rw-r--r--target-s390x/ioinst.c761
-rw-r--r--target-s390x/ioinst.h230
-rw-r--r--target-s390x/kvm.c244
-rw-r--r--target-unicore32/cpu.c26
-rw-r--r--target-unicore32/helper.c6
-rw-r--r--tests/.gitignore1
-rw-r--r--tests/Makefile12
-rwxr-xr-xtests/qemu-iotests/04181
-rw-r--r--tests/qemu-iotests/041.out4
-rw-r--r--tests/test-hbitmap.c401
-rw-r--r--tests/test-x86-cpuid.c110
-rw-r--r--trace-events30
-rw-r--r--util/Makefile.objs2
-rw-r--r--util/hbitmap.c401
-rw-r--r--vl.c52
98 files changed, 7577 insertions, 724 deletions
diff --git a/block-migration.c b/block-migration.c
index 6acf3e1682..9ac7de6d78 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -23,7 +23,8 @@
#include "sysemu/blockdev.h"
#include <assert.h>
-#define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
+#define BLOCK_SIZE (1 << 20)
+#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
#define BLK_MIG_FLAG_EOS 0x02
@@ -254,7 +255,7 @@ static void set_dirty_tracking(int enable)
BlkMigDevState *bmds;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- bdrv_set_dirty_tracking(bmds->bs, enable);
+ bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
}
}
@@ -478,7 +479,7 @@ static int64_t get_remaining_dirty(void)
dirty += bdrv_get_dirty_count(bmds->bs);
}
- return dirty * BLOCK_SIZE;
+ return dirty << BDRV_SECTOR_BITS;
}
static void blk_mig_cleanup(void)
diff --git a/block.c b/block.c
index 6fa7c90144..ba67c0def2 100644
--- a/block.c
+++ b/block.c
@@ -1286,7 +1286,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
bs_dest->iostatus = bs_src->iostatus;
/* dirty bitmap */
- bs_dest->dirty_count = bs_src->dirty_count;
bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
/* job */
@@ -1674,10 +1673,10 @@ static void tracked_request_begin(BdrvTrackedRequest *req,
/**
* Round a region to cluster boundaries
*/
-static void round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
@@ -1719,8 +1718,8 @@ static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
* CoR read and write operations are atomic and guest writes cannot
* interleave between them.
*/
- round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
do {
retry = false;
@@ -2035,36 +2034,6 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
return ret;
}
-#define BITS_PER_LONG (sizeof(unsigned long) * 8)
-
-static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int dirty)
-{
- int64_t start, end;
- unsigned long val, idx, bit;
-
- start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
- end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- for (; start <= end; start++) {
- idx = start / BITS_PER_LONG;
- bit = start % BITS_PER_LONG;
- val = bs->dirty_bitmap[idx];
- if (dirty) {
- if (!(val & (1UL << bit))) {
- bs->dirty_count++;
- val |= 1UL << bit;
- }
- } else {
- if (val & (1UL << bit)) {
- bs->dirty_count--;
- val &= ~(1UL << bit);
- }
- }
- bs->dirty_bitmap[idx] = val;
- }
-}
-
/* Return < 0 if error. Important errors are:
-EIO generic I/O error (may happen for all errors)
-ENOMEDIUM No media inserted.
@@ -2216,8 +2185,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
- round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
cluster_sector_num, cluster_nb_sectors);
@@ -2863,8 +2832,9 @@ BlockInfo *bdrv_query_info(BlockDriverState *bs)
if (bs->dirty_bitmap) {
info->has_dirty = true;
info->dirty = g_malloc0(sizeof(*info->dirty));
- info->dirty->count = bdrv_get_dirty_count(bs) *
- BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE;
+ info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
+ info->dirty->granularity =
+ ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
}
if (bs->drv) {
@@ -4173,7 +4143,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
}
if (bs->dirty_bitmap) {
- set_dirty_bitmap(bs, sector_num, nb_sectors, 0);
+ bdrv_reset_dirty(bs, sector_num, nb_sectors);
}
if (bs->drv->bdrv_co_discard) {
@@ -4331,22 +4301,20 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
return true;
}
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
{
int64_t bitmap_size;
- bs->dirty_count = 0;
- if (enable) {
- if (!bs->dirty_bitmap) {
- bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
- BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG - 1;
- bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
+ assert((granularity & (granularity - 1)) == 0);
- bs->dirty_bitmap = g_new0(unsigned long, bitmap_size);
- }
+ if (granularity) {
+ granularity >>= BDRV_SECTOR_BITS;
+ assert(!bs->dirty_bitmap);
+ bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+ bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
} else {
if (bs->dirty_bitmap) {
- g_free(bs->dirty_bitmap);
+ hbitmap_free(bs->dirty_bitmap);
bs->dirty_bitmap = NULL;
}
}
@@ -4354,67 +4322,37 @@ void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
{
- int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- if (bs->dirty_bitmap &&
- (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
- return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] &
- (1UL << (chunk % BITS_PER_LONG)));
+ if (bs->dirty_bitmap) {
+ return hbitmap_get(bs->dirty_bitmap, sector);
} else {
return 0;
}
}
-int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector)
+void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
{
- int64_t chunk;
- int bit, elem;
-
- /* Avoid an infinite loop. */
- assert(bs->dirty_count > 0);
-
- sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
- chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG);
- elem = chunk / BITS_PER_LONG;
- bit = chunk % BITS_PER_LONG;
- for (;;) {
- if (sector >= bs->total_sectors) {
- sector = 0;
- bit = elem = 0;
- }
- if (bit == 0 && bs->dirty_bitmap[elem] == 0) {
- sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
- elem++;
- } else {
- if (bs->dirty_bitmap[elem] & (1UL << bit)) {
- return sector;
- }
- sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
- if (++bit == BITS_PER_LONG) {
- bit = 0;
- elem++;
- }
- }
- }
+ hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
}
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
{
- set_dirty_bitmap(bs, cur_sector, nr_sectors, 1);
+ hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
int nr_sectors)
{
- set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
+ hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
}
int64_t bdrv_get_dirty_count(BlockDriverState *bs)
{
- return bs->dirty_count;
+ if (bs->dirty_bitmap) {
+ return hbitmap_count(bs->dirty_bitmap);
+ } else {
+ return 0;
+ }
}
void bdrv_set_in_use(BlockDriverState *bs, int in_use)
diff --git a/block/bochs.c b/block/bochs.c
index 1b1d9cdbe5..37375834e9 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -126,7 +126,7 @@ static int bochs_open(BlockDriverState *bs, int flags)
strcmp(bochs.subtype, GROWING_TYPE) ||
((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
(le32_to_cpu(bochs.version) != HEADER_V1))) {
- goto fail;
+ return -EMEDIUMTYPE;
}
if (le32_to_cpu(bochs.version) == HEADER_V1) {
diff --git a/block/cow.c b/block/cow.c
index a33ce950d4..4baf9042fe 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -73,7 +73,7 @@ static int cow_open(BlockDriverState *bs, int flags)
}
if (be32_to_cpu(cow_header.magic) != COW_MAGIC) {
- ret = -EINVAL;
+ ret = -EMEDIUMTYPE;
goto fail;
}
diff --git a/block/mirror.c b/block/mirror.c
index 6180aa30e5..a62ad86c28 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -15,17 +15,17 @@
#include "block/blockjob.h"
#include "block/block_int.h"
#include "qemu/ratelimit.h"
+#include "qemu/bitmap.h"
-enum {
- /*
- * Size of data buffer for populating the image file. This should be large
- * enough to process multiple clusters in a single call, so that populating
- * contiguous regions of the image is efficient.
- */
- BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
-};
+#define SLICE_TIME 100000000ULL /* ns */
+#define MAX_IN_FLIGHT 16
-#define SLICE_TIME 100000000ULL /* ns */
+/* The mirroring buffer is a list of granularity-sized chunks.
+ * Free chunks are organized in a list.
+ */
+typedef struct MirrorBuffer {
+ QSIMPLEQ_ENTRY(MirrorBuffer) next;
+} MirrorBuffer;
typedef struct MirrorBlockJob {
BlockJob common;
@@ -36,9 +36,26 @@ typedef struct MirrorBlockJob {
bool synced;
bool should_complete;
int64_t sector_num;
+ int64_t granularity;
+ size_t buf_size;
+ unsigned long *cow_bitmap;
+ HBitmapIter hbi;
uint8_t *buf;
+ QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
+ int buf_free_count;
+
+ unsigned long *in_flight_bitmap;
+ int in_flight;
+ int ret;
} MirrorBlockJob;
+typedef struct MirrorOp {
+ MirrorBlockJob *s;
+ QEMUIOVector qiov;
+ int64_t sector_num;
+ int nb_sectors;
+} MirrorOp;
+
static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
int error)
{
@@ -52,51 +69,234 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
}
}
-static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
- BlockErrorAction *p_action)
+static void mirror_iteration_done(MirrorOp *op, int ret)
{
- BlockDriverState *source = s->common.bs;
- BlockDriverState *target = s->target;
- QEMUIOVector qiov;
- int ret, nb_sectors;
- int64_t end;
- struct iovec iov;
+ MirrorBlockJob *s = op->s;
+ struct iovec *iov;
+ int64_t chunk_num;
+ int i, nb_chunks, sectors_per_chunk;
+
+ trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
+
+ s->in_flight--;
+ iov = op->qiov.iov;
+ for (i = 0; i < op->qiov.niov; i++) {
+ MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
+ QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
+ s->buf_free_count++;
+ }
- end = s->common.len >> BDRV_SECTOR_BITS;
- s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
- nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
- bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+ sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+ chunk_num = op->sector_num / sectors_per_chunk;
+ nb_chunks = op->nb_sectors / sectors_per_chunk;
+ bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
+ if (s->cow_bitmap && ret >= 0) {
+ bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
+ }
- /* Copy the dirty cluster. */
- iov.iov_base = s->buf;
- iov.iov_len = nb_sectors * 512;
- qemu_iovec_init_external(&qiov, &iov, 1);
+ g_slice_free(MirrorOp, op);
+ qemu_coroutine_enter(s->common.co, NULL);
+}
- trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
- ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+static void mirror_write_complete(void *opaque, int ret)
+{
+ MirrorOp *op = opaque;
+ MirrorBlockJob *s = op->s;
if (ret < 0) {
- *p_action = mirror_error_action(s, true, -ret);
- goto fail;
+ BlockDriverState *source = s->common.bs;
+ BlockErrorAction action;
+
+ bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+ action = mirror_error_action(s, false, -ret);
+ if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
+ s->ret = ret;
+ }
}
- ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+ mirror_iteration_done(op, ret);
+}
+
+static void mirror_read_complete(void *opaque, int ret)
+{
+ MirrorOp *op = opaque;
+ MirrorBlockJob *s = op->s;
if (ret < 0) {
- *p_action = mirror_error_action(s, false, -ret);
- s->synced = false;
- goto fail;
+ BlockDriverState *source = s->common.bs;
+ BlockErrorAction action;
+
+ bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+ action = mirror_error_action(s, true, -ret);
+ if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
+ s->ret = ret;
+ }
+
+ mirror_iteration_done(op, ret);
+ return;
+ }
+ bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
+ mirror_write_complete, op);
+}
+
+static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+ BlockDriverState *source = s->common.bs;
+ int nb_sectors, sectors_per_chunk, nb_chunks;
+ int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
+ MirrorOp *op;
+
+ s->sector_num = hbitmap_iter_next(&s->hbi);
+ if (s->sector_num < 0) {
+ bdrv_dirty_iter_init(source, &s->hbi);
+ s->sector_num = hbitmap_iter_next(&s->hbi);
+ trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
+ assert(s->sector_num >= 0);
+ }
+
+ hbitmap_next_sector = s->sector_num;
+ sector_num = s->sector_num;
+ sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+ end = s->common.len >> BDRV_SECTOR_BITS;
+
+ /* Extend the QEMUIOVector to include all adjacent blocks that will
+ * be copied in this operation.
+ *
+ * We have to do this if we have no backing file yet in the destination,
+ * and the cluster size is very large. Then we need to do COW ourselves.
+ * The first time a cluster is copied, copy it entirely. Note that,
+ * because both the granularity and the cluster size are powers of two,
+ * the number of sectors to copy cannot exceed one cluster.
+ *
+ * We also want to extend the QEMUIOVector to include more adjacent
+ * dirty blocks if possible, to limit the number of I/O operations and
+ * run efficiently even with a small granularity.
+ */
+ nb_chunks = 0;
+ nb_sectors = 0;
+ next_sector = sector_num;
+ next_chunk = sector_num / sectors_per_chunk;
+
+ /* Wait for I/O to this cluster (from a previous iteration) to be done. */
+ while (test_bit(next_chunk, s->in_flight_bitmap)) {
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+ qemu_coroutine_yield();
+ }
+
+ do {
+ int added_sectors, added_chunks;
+
+ if (!bdrv_get_dirty(source, next_sector) ||
+ test_bit(next_chunk, s->in_flight_bitmap)) {
+ assert(nb_sectors > 0);
+ break;
+ }
+
+ added_sectors = sectors_per_chunk;
+ if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
+ bdrv_round_to_clusters(s->target,
+ next_sector, added_sectors,
+ &next_sector, &added_sectors);
+
+ /* On the first iteration, the rounding may make us copy
+ * sectors before the first dirty one.
+ */
+ if (next_sector < sector_num) {
+ assert(nb_sectors == 0);
+ sector_num = next_sector;
+ next_chunk = next_sector / sectors_per_chunk;
+ }
+ }
+
+ added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
+ added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
+
+ /* When doing COW, it may happen that there is not enough space for
+ * a full cluster. Wait if that is the case.
+ */
+ while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
+ trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
+ qemu_coroutine_yield();
+ }
+ if (s->buf_free_count < nb_chunks + added_chunks) {
+ trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
+ break;
+ }
+
+ /* We have enough free space to copy these sectors. */
+ bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
+
+ nb_sectors += added_sectors;
+ nb_chunks += added_chunks;
+ next_sector += added_sectors;
+ next_chunk += added_chunks;
+ } while (next_sector < end);
+
+ /* Allocate a MirrorOp that is used as an AIO callback. */
+ op = g_slice_new(MirrorOp);
+ op->s = s;
+ op->sector_num = sector_num;
+ op->nb_sectors = nb_sectors;
+
+ /* Now make a QEMUIOVector taking enough granularity-sized chunks
+ * from s->buf_free.
+ */
+ qemu_iovec_init(&op->qiov, nb_chunks);
+ next_sector = sector_num;
+ while (nb_chunks-- > 0) {
+ MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
+ QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
+ s->buf_free_count--;
+ qemu_iovec_add(&op->qiov, buf, s->granularity);
+
+ /* Advance the HBitmapIter in parallel, so that we do not examine
+ * the same sector twice.
+ */
+ if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
+ hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
+ }
+
+ next_sector += sectors_per_chunk;
}
- return 0;
-fail:
- /* Try again later. */
- bdrv_set_dirty(source, s->sector_num, nb_sectors);
- return ret;
+ bdrv_reset_dirty(source, sector_num, nb_sectors);
+
+ /* Copy the dirty cluster. */
+ s->in_flight++;
+ trace_mirror_one_iteration(s, sector_num, nb_sectors);
+ bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+ mirror_read_complete, op);
+}
+
+static void mirror_free_init(MirrorBlockJob *s)
+{
+ int granularity = s->granularity;
+ size_t buf_size = s->buf_size;
+ uint8_t *buf = s->buf;
+
+ assert(s->buf_free_count == 0);
+ QSIMPLEQ_INIT(&s->buf_free);
+ while (buf_size != 0) {
+ MirrorBuffer *cur = (MirrorBuffer *)buf;
+ QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
+ s->buf_free_count++;
+ buf_size -= granularity;
+ buf += granularity;
+ }
+}
+
+static void mirror_drain(MirrorBlockJob *s)
+{
+ while (s->in_flight > 0) {
+ qemu_coroutine_yield();
+ }
}
static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
BlockDriverState *bs = s->common.bs;
- int64_t sector_num, end;
+ int64_t sector_num, end, sectors_per_chunk, length;
+ uint64_t last_pause_ns;
+ BlockDriverInfo bdi;
+ char backing_filename[1024];
int ret = 0;
int n;
@@ -105,20 +305,39 @@ static void coroutine_fn mirror_run(void *opaque)
}
s->common.len = bdrv_getlength(bs);
- if (s->common.len < 0) {
+ if (s->common.len <= 0) {
block_job_completed(&s->common, s->common.len);
return;
}
+ length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
+ s->in_flight_bitmap = bitmap_new(length);
+
+ /* If we have no backing file yet in the destination, we cannot let
+ * the destination do COW. Instead, we copy sectors around the
+ * dirty data if needed. We need a bitmap to do that.
+ */
+ bdrv_get_backing_filename(s->target, backing_filename,
+ sizeof(backing_filename));
+ if (backing_filename[0] && !s->target->backing_hd) {
+ bdrv_get_info(s->target, &bdi);
+ if (s->granularity < bdi.cluster_size) {
+ s->buf_size = MAX(s->buf_size, bdi.cluster_size);
+ s->cow_bitmap = bitmap_new(length);
+ }
+ }
+
end = s->common.len >> BDRV_SECTOR_BITS;
- s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+ s->buf = qemu_blockalign(bs, s->buf_size);
+ sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+ mirror_free_init(s);
if (s->mode != MIRROR_SYNC_MODE_NONE) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
BlockDriverState *base;
base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
for (sector_num = 0; sector_num < end; ) {
- int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
+ int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
ret = bdrv_co_is_allocated_above(bs, base,
sector_num, next - sector_num, &n);
@@ -136,24 +355,40 @@ static void coroutine_fn mirror_run(void *opaque)
}
}
- s->sector_num = -1;
+ bdrv_dirty_iter_init(bs, &s->hbi);
+ last_pause_ns = qemu_get_clock_ns(rt_clock);
for (;;) {
uint64_t delay_ns;
int64_t cnt;
bool should_complete;
+ if (s->ret < 0) {
+ ret = s->ret;
+ goto immediate_exit;
+ }
+
cnt = bdrv_get_dirty_count(bs);
- if (cnt != 0) {
- BlockErrorAction action = BDRV_ACTION_REPORT;
- ret = mirror_iteration(s, &action);
- if (ret < 0 && action == BDRV_ACTION_REPORT) {
- goto immediate_exit;
+
+ /* Note that even when no rate limit is applied we need to yield
+ * periodically with no pending I/O so that qemu_aio_flush() returns.
+ * We do so every SLICE_TIME nanoseconds, or when there is an error,
+ * or when the source is clean, whichever comes first.
+ */
+ if (qemu_get_clock_ns(rt_clock) - last_pause_ns < SLICE_TIME &&
+ s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+ if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+ (cnt == 0 && s->in_flight > 0)) {
+ trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
+ qemu_coroutine_yield();
+ continue;
+ } else if (cnt != 0) {
+ mirror_iteration(s);
+ continue;
}
- cnt = bdrv_get_dirty_count(bs);
}
should_complete = false;
- if (cnt == 0) {
+ if (s->in_flight == 0 && cnt == 0) {
trace_mirror_before_flush(s);
ret = bdrv_flush(s->target);
if (ret < 0) {
@@ -196,23 +431,20 @@ static void coroutine_fn mirror_run(void *opaque)
trace_mirror_before_sleep(s, cnt, s->synced);
if (!s->synced) {
/* Publish progress */
- s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
+ s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
if (s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
+ delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
} else {
delay_ns = 0;
}
- /* Note that even when no rate limit is applied we need to yield
- * with no pending I/O here so that bdrv_drain_all() returns.
- */
block_job_sleep_ns(&s->common, rt_clock, delay_ns);
if (block_job_is_cancelled(&s->common)) {
break;
}
} else if (!should_complete) {
- delay_ns = (cnt == 0 ? SLICE_TIME : 0);
+ delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
block_job_sleep_ns(&s->common, rt_clock, delay_ns);
} else if (cnt == 0) {
/* The two disks are in sync. Exit and report successful
@@ -222,11 +454,24 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.cancelled = false;
break;
}
+ last_pause_ns = qemu_get_clock_ns(rt_clock);
}
immediate_exit:
+ if (s->in_flight > 0) {
+ /* We get here only if something went wrong. Either the job failed,
+ * or it was cancelled prematurely so that we do not guarantee that
+ * the target is a copy of the source.
+ */
+ assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
+ mirror_drain(s);
+ }
+
+ assert(s->in_flight == 0);
qemu_vfree(s->buf);
- bdrv_set_dirty_tracking(bs, false);
+ g_free(s->cow_bitmap);
+ g_free(s->in_flight_bitmap);
+ bdrv_set_dirty_tracking(bs, 0);
bdrv_iostatus_disable(s->target);
if (s->should_complete && ret == 0) {
if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
@@ -288,14 +533,28 @@ static BlockJobType mirror_job_type = {
};
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, MirrorSyncMode mode,
- BlockdevOnError on_source_error,
+ int64_t speed, int64_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockDriverCompletionFunc *cb,
void *opaque, Error **errp)
{
MirrorBlockJob *s;
+ if (granularity == 0) {
+ /* Choose the default granularity based on the target file's cluster
+ * size, clamped between 4k and 64k. */
+ BlockDriverInfo bdi;
+ if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
+ granularity = MAX(4096, bdi.cluster_size);
+ granularity = MIN(65536, granularity);
+ } else {
+ granularity = 65536;
+ }
+ }
+
+ assert ((granularity & (granularity - 1)) == 0);
+
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
!bdrv_iostatus_is_enabled(bs)) {
@@ -312,7 +571,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
s->on_target_error = on_target_error;
s->target = target;
s->mode = mode;
- bdrv_set_dirty_tracking(bs, true);
+ s->granularity = granularity;
+ s->buf_size = MAX(buf_size, granularity);
+
+ bdrv_set_dirty_tracking(bs, granularity);
bdrv_set_enable_write_cache(s->target, true);
bdrv_set_on_error(s->target, on_target_error, on_target_error);
bdrv_iostatus_enable(s->target);
diff --git a/block/qcow.c b/block/qcow.c
index 4276610afd..a7135eea47 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -112,7 +112,7 @@ static int qcow_open(BlockDriverState *bs, int flags)
be64_to_cpus(&header.l1_table_offset);
if (header.magic != QCOW_MAGIC) {
- ret = -EINVAL;
+ ret = -EMEDIUMTYPE;
goto fail;
}
if (header.version != QCOW_VERSION) {
diff --git a/block/qcow2.c b/block/qcow2.c
index f6abff6111..7610e569e3 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -311,7 +311,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
be32_to_cpus(&header.nb_snapshots);
if (header.magic != QCOW_MAGIC) {
- ret = -EINVAL;
+ ret = -EMEDIUMTYPE;
goto fail;
}
if (header.version < 2 || header.version > 3) {
diff --git a/block/qed.c b/block/qed.c
index cf85d8f2b4..b8515e58b3 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -390,7 +390,7 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags)
qed_header_le_to_cpu(&le_header, &s->header);
if (s->header.magic != QED_MAGIC) {
- return -EINVAL;
+ return -EMEDIUMTYPE;
}
if (s->header.features & ~QED_FEATURE_MASK) {
/* image uses unsupported feature bits */
diff --git a/block/vdi.c b/block/vdi.c
index 021abaa227..257a592ea9 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -246,7 +246,7 @@ static void vdi_header_print(VdiHeader *header)
{
char uuid[37];
logout("text %s", header->text);
- logout("signature 0x%04x\n", header->signature);
+ logout("signature 0x%08x\n", header->signature);
logout("header size 0x%04x\n", header->header_size);
logout("image type 0x%04x\n", header->image_type);
logout("image flags 0x%04x\n", header->image_flags);
@@ -369,10 +369,12 @@ static int vdi_open(BlockDriverState *bs, int flags)
BDRVVdiState *s = bs->opaque;
VdiHeader header;
size_t bmap_size;
+ int ret;
logout("\n");
- if (bdrv_read(bs->file, 0, (uint8_t *)&header, 1) < 0) {
+ ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
+ if (ret < 0) {
goto fail;
}
@@ -390,33 +392,45 @@ static int vdi_open(BlockDriverState *bs, int flags)
header.disk_size &= ~(SECTOR_SIZE - 1);
}
- if (header.version != VDI_VERSION_1_1) {
+ if (header.signature != VDI_SIGNATURE) {
+ logout("bad vdi signature %08x\n", header.signature);
+ ret = -EMEDIUMTYPE;
+ goto fail;
+ } else if (header.version != VDI_VERSION_1_1) {
logout("unsupported version %u.%u\n",
header.version >> 16, header.version & 0xffff);
+ ret = -ENOTSUP;
goto fail;
} else if (header.offset_bmap % SECTOR_SIZE != 0) {
/* We only support block maps which start on a sector boundary. */
logout("unsupported block map offset 0x%x B\n", header.offset_bmap);
+ ret = -ENOTSUP;
goto fail;
} else if (header.offset_data % SECTOR_SIZE != 0) {
/* We only support data blocks which start on a sector boundary. */
logout("unsupported data offset 0x%x B\n", header.offset_data);
+ ret = -ENOTSUP;
goto fail;
} else if (header.sector_size != SECTOR_SIZE) {
logout("unsupported sector size %u B\n", header.sector_size);
+ ret = -ENOTSUP;
goto fail;
} else if (header.block_size != 1 * MiB) {
logout("unsupported block size %u B\n", header.block_size);
+ ret = -ENOTSUP;
goto fail;
} else if (header.disk_size >
(uint64_t)header.blocks_in_image * header.block_size) {
logout("unsupported disk size %" PRIu64 " B\n", header.disk_size);
+ ret = -ENOTSUP;
goto fail;
} else if (!uuid_is_null(header.uuid_link)) {
logout("link uuid != 0, unsupported\n");
+ ret = -ENOTSUP;
goto fail;
} else if (!uuid_is_null(header.uuid_parent)) {
logout("parent uuid != 0, unsupported\n");
+ ret = -ENOTSUP;
goto fail;
}
@@ -432,7 +446,8 @@ static int vdi_open(BlockDriverState *bs, int flags)
if (bmap_size > 0) {
s->bmap = g_malloc(bmap_size * SECTOR_SIZE);
}
- if (bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size) < 0) {
+ ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
+ if (ret < 0) {
goto fail_free_bmap;
}
@@ -448,7 +463,7 @@ static int vdi_open(BlockDriverState *bs, int flags)
g_free(s->bmap);
fail:
- return -1;
+ return ret;
}
static int vdi_reopen_prepare(BDRVReopenState *state,
diff --git a/block/vmdk.c b/block/vmdk.c
index 19298c2a3e..8333afb5e3 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -616,7 +616,7 @@ static int vmdk_open_sparse(BlockDriverState *bs,
return vmdk_open_vmdk4(bs, file, flags);
break;
default:
- return -EINVAL;
+ return -EMEDIUMTYPE;
break;
}
}
@@ -718,7 +718,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
}
buf[2047] = '\0';
if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
- return -EINVAL;
+ return -EMEDIUMTYPE;
}
if (strcmp(ct, "monolithicFlat") &&
strcmp(ct, "twoGbMaxExtentSparse") &&
diff --git a/blockdev.c b/blockdev.c
index 9126587c45..63e6f1eafa 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -617,8 +617,13 @@ DriveInfo *drive_init(QemuOpts *opts, BlockInterfaceType block_default_type)
ret = bdrv_open(dinfo->bdrv, file, bdrv_flags, drv);
if (ret < 0) {
- error_report("could not open disk image %s: %s",
- file, strerror(-ret));
+ if (ret == -EMEDIUMTYPE) {
+ error_report("could not open disk image %s: not in %s format",
+ file, drv->format_name);
+ } else {
+ error_report("could not open disk image %s: %s",
+ file, strerror(-ret));
+ }
goto err;
}
@@ -642,21 +647,17 @@ void do_commit(Monitor *mon, const QDict *qdict)
if (!strcmp(device, "all")) {
ret = bdrv_commit_all();
- if (ret == -EBUSY) {
- qerror_report(QERR_DEVICE_IN_USE, device);
- return;
- }
} else {
bs = bdrv_find(device);
if (!bs) {
- qerror_report(QERR_DEVICE_NOT_FOUND, device);
+ monitor_printf(mon, "Device '%s' not found\n", device);
return;
}
ret = bdrv_commit(bs);
- if (ret == -EBUSY) {
- qerror_report(QERR_DEVICE_IN_USE, device);
- return;
- }
+ }
+ if (ret < 0) {
+ monitor_printf(mon, "'commit' error for '%s': %s\n", device,
+ strerror(-ret));
}
}
@@ -1188,16 +1189,19 @@ void qmp_block_commit(const char *device,
drive_get_ref(drive_get_by_blockdev(bs));
}
+#define DEFAULT_MIRROR_BUF_SIZE (10 << 20)
+
void qmp_drive_mirror(const char *device, const char *target,
bool has_format, const char *format,
enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
+ bool has_granularity, uint32_t granularity,
+ bool has_buf_size, int64_t buf_size,
bool has_on_source_error, BlockdevOnError on_source_error,
bool has_on_target_error, BlockdevOnError on_target_error,
Error **errp)
{
- BlockDriverInfo bdi;
BlockDriverState *bs;
BlockDriverState *source, *target_bs;
BlockDriver *proto_drv;
@@ -1219,6 +1223,21 @@ void qmp_drive_mirror(const char *device, const char *target,
if (!has_mode) {
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
+ if (!has_granularity) {
+ granularity = 0;
+ }
+ if (!has_buf_size) {
+ buf_size = DEFAULT_MIRROR_BUF_SIZE;
+ }
+
+ if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
+ error_set(errp, QERR_INVALID_PARAMETER, device);
+ return;
+ }
+ if (granularity & (granularity - 1)) {
+ error_set(errp, QERR_INVALID_PARAMETER, device);
+ return;
+ }
bs = bdrv_find(device);
if (!bs) {
@@ -1259,11 +1278,11 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
+ bdrv_get_geometry(bs, &size);
+ size *= 512;
if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) {
/* create new image w/o backing file */
assert(format && drv);
- bdrv_get_geometry(bs, &size);
- size *= 512;
bdrv_img_create(target, format,
NULL, NULL, NULL, size, flags, &local_err);
} else {
@@ -1276,7 +1295,7 @@ void qmp_drive_mirror(const char *device, const char *target,
bdrv_img_create(target, format,
source->filename,
source->drv->format_name,
- NULL, -1, flags, &local_err);
+ NULL, size, flags, &local_err);
break;
default:
abort();
@@ -1288,6 +1307,9 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
+ /* Mirroring takes care of copy-on-write using the source's backing
+ * file.
+ */
target_bs = bdrv_new("");
ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
@@ -1297,18 +1319,8 @@ void qmp_drive_mirror(const char *device, const char *target,
return;
}
- /* We need a backing file if we will copy parts of a cluster. */
- if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
- bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
- ret = bdrv_open_backing_file(target_bs);
- if (ret < 0) {
- bdrv_delete(target_bs);
- error_set(errp, QERR_OPEN_FILE_FAILED, target);
- return;
- }
- }
-
- mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+ mirror_start(bs, target_bs, speed, granularity, buf_size, sync,
+ on_source_error, on_target_error,
block_job_cb, bs, &local_err);
if (local_err != NULL) {
bdrv_delete(target_bs);
diff --git a/cpus.c b/cpus.c
index a4390c3c3f..41779eb02e 100644
--- a/cpus.c
+++ b/cpus.c
@@ -517,7 +517,7 @@ static void qemu_init_sigbus(void)
prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
}
-static void qemu_kvm_eat_signals(CPUArchState *env)
+static void qemu_kvm_eat_signals(CPUState *cpu)
{
struct timespec ts = { 0, 0 };
siginfo_t siginfo;
@@ -538,7 +538,7 @@ static void qemu_kvm_eat_signals(CPUArchState *env)
switch (r) {
case SIGBUS:
- if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
+ if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
sigbus_reraise();
}
break;
@@ -560,7 +560,7 @@ static void qemu_init_sigbus(void)
{
}
-static void qemu_kvm_eat_signals(CPUArchState *env)
+static void qemu_kvm_eat_signals(CPUState *cpu)
{
}
#endif /* !CONFIG_LINUX */
@@ -727,7 +727,7 @@ static void qemu_kvm_wait_io_event(CPUArchState *env)
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
- qemu_kvm_eat_signals(env);
+ qemu_kvm_eat_signals(cpu);
qemu_wait_io_event_common(cpu);
}
diff --git a/docs/virtio-balloon-stats.txt b/docs/virtio-balloon-stats.txt
new file mode 100644
index 0000000000..f74612f468
--- /dev/null
+++ b/docs/virtio-balloon-stats.txt
@@ -0,0 +1,104 @@
+virtio balloon memory statistics
+================================
+
+The virtio balloon driver supports guest memory statistics reporting. These
+statistics are available to QEMU users as QOM (QEMU Object Model) device
+properties via a polling mechanism.
+
+Before querying the available stats, clients first have to enable polling.
+This is done by writing a time interval value (in seconds) to the
+guest-stats-polling-interval property. This value can be:
+
+ > 0 enables polling in the specified interval. If polling is already
+ enabled, the polling time interval is changed to the new value
+
+ 0 disables polling. Previous polled statistics are still valid and
+ can be queried.
+
+Once polling is enabled, the virtio-balloon device in QEMU will start
+polling the guest's balloon driver for new stats in the specified time
+interval.
+
+To retrieve those stats, clients have to query the guest-stats property,
+which will return a dictionary containing:
+
+ o A key named 'stats', containing all available stats. If the guest
+ doesn't support a particular stat, or if it couldn't be retrieved,
+ its value will be -1. Currently, the following stats are supported:
+
+ - stat-swap-in
+ - stat-swap-out
+ - stat-major-faults
+ - stat-minor-faults
+ - stat-free-memory
+ - stat-total-memory
+
+ o A key named last-update, which contains the last stats update
+ timestamp in seconds. Since this timestamp is generated by the host,
+ a buggy guest can't influence its value
+
+It's also important to note the following:
+
+ - Previously polled statistics remain available even if the polling is
+ later disabled
+
+ - As noted above, if a guest doesn't support a particular stat its value
+ will always be -1. However, it's also possible that a guest temporarily
+ couldn't update one or even all stats. If this happens, just wait for
+ the next update
+
+ - Polling can be enabled even if the guest doesn't have stats support
+ or the balloon driver wasn't loaded in the guest. If this is the case
+ and stats are queried, an error will be returned
+
+ - The polling timer is only re-armed when the guest responds to the
+ statistics request. This means that if a (buggy) guest doesn't ever
+ respond to the request the timer will never be re-armed, which has
+ the same effect as disabling polling
+
+Here are a few examples. QEMU is started with '-balloon virtio', which
+generates '/machine/peripheral-anon/device[1]' as the QOM path for the
+balloon device.
+
+Enable polling with 2 seconds interval:
+
+{ "execute": "qom-set",
+ "arguments": { "path": "/machine/peripheral-anon/device[1]",
+ "property": "guest-stats-polling-interval", "value": 2 } }
+
+{ "return": {} }
+
+Change polling to 10 seconds:
+
+{ "execute": "qom-set",
+ "arguments": { "path": "/machine/peripheral-anon/device[1]",
+ "property": "guest-stats-polling-interval", "value": 10 } }
+
+{ "return": {} }
+
+Get stats:
+
+{ "execute": "qom-get",
+ "arguments": { "path": "/machine/peripheral-anon/device[1]",
+ "property": "guest-stats" } }
+{
+ "return": {
+ "stats": {
+ "stat-swap-out": 0,
+ "stat-free-memory": 844943360,
+ "stat-minor-faults": 219028,
+ "stat-major-faults": 235,
+ "stat-total-memory": 1044406272,
+ "stat-swap-in": 0
+ },
+ "last-update": 1358529861
+ }
+}
+
+Disable polling:
+
+{ "execute": "qom-set",
+ "arguments": { "path": "/machine/peripheral-anon/device[1]",
+ "property": "stats-polling-interval", "value": 0 } }
+
+{ "return": {} }
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0934b9b915..bdd48f3469 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -837,6 +837,45 @@ STEXI
@item nmi @var{cpu}
@findex nmi
Inject an NMI on the given CPU (x86 only).
+
+ETEXI
+
+ {
+ .name = "memchar_write",
+ .args_type = "device:s,data:s",
+ .params = "device data",
+ .help = "Provide writing interface for CirMemCharDriver. Write"
+ "'data' to it.",
+ .mhandler.cmd = hmp_memchar_write,
+ },
+
+STEXI
+@item memchar_write @var{device} @var{data}
+@findex memchar_write
+Provide writing interface for CirMemCharDriver. Write @var{data}
+to char device 'memory'.
+
+ETEXI
+
+ {
+ .name = "memchar_read",
+ .args_type = "device:s,size:i",
+ .params = "device size",
+ .help = "Provide read interface for CirMemCharDriver. Read from"
+ "it and return the data with size.",
+ .mhandler.cmd = hmp_memchar_read,
+ },
+
+STEXI
+@item memchar_read @var{device}
+@findex memchar_read
+Provide read interface for CirMemCharDriver. Read from char device
+'memory' and return the data.
+
+@var{size} is the size of data want to read from. Refer to unencoded
+size of the raw data, would adjust to the init size of the memchar
+if the requested size is larger than it.
+
ETEXI
{
diff --git a/hmp.c b/hmp.c
index c7b6ba02fc..249b89b7e3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -465,29 +465,7 @@ void hmp_info_balloon(Monitor *mon, const QDict *qdict)
return;
}
- monitor_printf(mon, "balloon: actual=%" PRId64, info->actual >> 20);
- if (info->has_mem_swapped_in) {
- monitor_printf(mon, " mem_swapped_in=%" PRId64, info->mem_swapped_in);
- }
- if (info->has_mem_swapped_out) {
- monitor_printf(mon, " mem_swapped_out=%" PRId64, info->mem_swapped_out);
- }
- if (info->has_major_page_faults) {
- monitor_printf(mon, " major_page_faults=%" PRId64,
- info->major_page_faults);
- }
- if (info->has_minor_page_faults) {
- monitor_printf(mon, " minor_page_faults=%" PRId64,
- info->minor_page_faults);
- }
- if (info->has_free_mem) {
- monitor_printf(mon, " free_mem=%" PRId64, info->free_mem);
- }
- if (info->has_total_mem) {
- monitor_printf(mon, " total_mem=%" PRId64, info->total_mem);
- }
-
- monitor_printf(mon, "\n");
+ monitor_printf(mon, "balloon: actual=%" PRId64 "\n", info->actual >> 20);
qapi_free_BalloonInfo(info);
}
@@ -684,6 +662,40 @@ void hmp_pmemsave(Monitor *mon, const QDict *qdict)
hmp_handle_error(mon, &errp);
}
+void hmp_memchar_write(Monitor *mon, const QDict *qdict)
+{
+ uint32_t size;
+ const char *chardev = qdict_get_str(qdict, "device");
+ const char *data = qdict_get_str(qdict, "data");
+ Error *errp = NULL;
+
+ size = strlen(data);
+ qmp_memchar_write(chardev, size, data, false, 0, &errp);
+
+ hmp_handle_error(mon, &errp);
+}
+
+void hmp_memchar_read(Monitor *mon, const QDict *qdict)
+{
+ uint32_t size = qdict_get_int(qdict, "size");
+ const char *chardev = qdict_get_str(qdict, "device");
+ MemCharRead *meminfo;
+ Error *errp = NULL;
+
+ meminfo = qmp_memchar_read(chardev, size, false, 0, &errp);
+ if (errp) {
+ monitor_printf(mon, "%s\n", error_get_pretty(errp));
+ error_free(errp);
+ return;
+ }
+
+ if (meminfo->count > 0) {
+ monitor_printf(mon, "%s\n", meminfo->data);
+ }
+
+ qapi_free_MemCharRead(meminfo);
+}
+
static void hmp_cont_cb(void *opaque, int err)
{
if (!err) {
@@ -796,7 +808,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
qmp_drive_mirror(device, filename, !!format, format,
full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
- true, mode, false, 0,
+ true, mode, false, 0, false, 0, false, 0,
false, 0, false, 0, &errp);
hmp_handle_error(mon, &errp);
}
diff --git a/hmp.h b/hmp.h
index 44be683fcc..076d8cf378 100644
--- a/hmp.h
+++ b/hmp.h
@@ -43,6 +43,8 @@ void hmp_system_powerdown(Monitor *mon, const QDict *qdict);
void hmp_cpu(Monitor *mon, const QDict *qdict);
void hmp_memsave(Monitor *mon, const QDict *qdict);
void hmp_pmemsave(Monitor *mon, const QDict *qdict);
+void hmp_memchar_write(Monitor *mon, const QDict *qdict);
+void hmp_memchar_read(Monitor *mon, const QDict *qdict);
void hmp_cont(Monitor *mon, const QDict *qdict);
void hmp_system_wakeup(Monitor *mon, const QDict *qdict);
void hmp_inject_nmi(Monitor *mon, const QDict *qdict);
diff --git a/hw/boards.h b/hw/boards.h
index 3ff9665b1f..3813d4e551 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -33,6 +33,7 @@ typedef struct QEMUMachine {
unsigned int no_serial:1,
no_parallel:1,
use_virtcon:1,
+ use_sclp:1,
no_floppy:1,
no_cdrom:1,
no_sdcard:1;
diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index bdcd836986..02618f2480 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -504,7 +504,6 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
fw_cfg_add_bytes(s, FW_CFG_UUID, qemu_uuid, 16);
fw_cfg_add_i16(s, FW_CFG_NOGRAPHIC, (uint16_t)(display_type == DT_NOGRAPHIC));
fw_cfg_add_i16(s, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
- fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
fw_cfg_bootsplash(s);
fw_cfg_reboot(s);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 21f50ea5be..ad0094f532 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -241,7 +241,7 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
if ((pr->cmd & PORT_CMD_FIS_ON) &&
!s->dev[port].init_d2h_sent) {
ahci_init_d2h(&s->dev[port]);
- s->dev[port].init_d2h_sent = 1;
+ s->dev[port].init_d2h_sent = true;
}
check_cmd(s, port);
@@ -494,7 +494,7 @@ static void ahci_reset_port(AHCIState *s, int port)
pr->scr_err = 0;
pr->scr_act = 0;
d->busy_slot = -1;
- d->init_d2h_sent = 0;
+ d->init_d2h_sent = false;
ide_state = &s->dev[port].port.ifs[0];
if (!ide_state->bs) {
@@ -946,7 +946,7 @@ static int handle_cmd(AHCIState *s, int port, int slot)
ide_state->hcyl = 0xeb;
debug_print_fis(ide_state->io_buffer, 0x10);
ide_state->feature = IDE_FEATURE_DMA;
- s->dev[port].done_atapi_packet = 0;
+ s->dev[port].done_atapi_packet = false;
/* XXX send PIO setup FIS */
}
@@ -991,7 +991,7 @@ static int ahci_start_transfer(IDEDMA *dma)
if (is_atapi && !ad->done_atapi_packet) {
/* already prepopulated iobuffer */
- ad->done_atapi_packet = 1;
+ ad->done_atapi_packet = true;
goto out;
}
@@ -1035,11 +1035,10 @@ out:
static void ahci_start_dma(IDEDMA *dma, IDEState *s,
BlockDriverCompletionFunc *dma_cb)
{
+#ifdef DEBUG_AHCI
AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
-
+#endif
DPRINTF(ad->port_no, "\n");
- ad->dma_cb = dma_cb;
- ad->dma_status |= BM_STATUS_DMAING;
s->io_buffer_offset = 0;
dma_cb(s, 0);
}
@@ -1095,7 +1094,6 @@ static int ahci_dma_set_unit(IDEDMA *dma, int unit)
static int ahci_dma_add_status(IDEDMA *dma, int status)
{
AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
- ad->dma_status |= status;
DPRINTF(ad->port_no, "set status: %x\n", status);
if (status & BM_STATUS_INT) {
@@ -1114,8 +1112,6 @@ static int ahci_dma_set_inactive(IDEDMA *dma)
/* update d2h status */
ahci_write_fis_d2h(ad, NULL);
- ad->dma_cb = NULL;
-
if (!ad->check_bh) {
/* maybe we still have something to process, check later */
ad->check_bh = qemu_bh_new(ahci_check_cmd_bh, ad);
@@ -1203,6 +1199,82 @@ void ahci_reset(AHCIState *s)
}
}
+static const VMStateDescription vmstate_ahci_device = {
+ .name = "ahci port",
+ .version_id = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_IDE_BUS(port, AHCIDevice),
+ VMSTATE_UINT32(port_state, AHCIDevice),
+ VMSTATE_UINT32(finished, AHCIDevice),
+ VMSTATE_UINT32(port_regs.lst_addr, AHCIDevice),
+ VMSTATE_UINT32(port_regs.lst_addr_hi, AHCIDevice),
+ VMSTATE_UINT32(port_regs.fis_addr, AHCIDevice),
+ VMSTATE_UINT32(port_regs.fis_addr_hi, AHCIDevice),
+ VMSTATE_UINT32(port_regs.irq_stat, AHCIDevice),
+ VMSTATE_UINT32(port_regs.irq_mask, AHCIDevice),
+ VMSTATE_UINT32(port_regs.cmd, AHCIDevice),
+ VMSTATE_UINT32(port_regs.tfdata, AHCIDevice),
+ VMSTATE_UINT32(port_regs.sig, AHCIDevice),
+ VMSTATE_UINT32(port_regs.scr_stat, AHCIDevice),
+ VMSTATE_UINT32(port_regs.scr_ctl, AHCIDevice),
+ VMSTATE_UINT32(port_regs.scr_err, AHCIDevice),
+ VMSTATE_UINT32(port_regs.scr_act, AHCIDevice),
+ VMSTATE_UINT32(port_regs.cmd_issue, AHCIDevice),
+ VMSTATE_BOOL(done_atapi_packet, AHCIDevice),
+ VMSTATE_INT32(busy_slot, AHCIDevice),
+ VMSTATE_BOOL(init_d2h_sent, AHCIDevice),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static int ahci_state_post_load(void *opaque, int version_id)
+{
+ int i;
+ struct AHCIDevice *ad;
+ AHCIState *s = opaque;
+
+ for (i = 0; i < s->ports; i++) {
+ ad = &s->dev[i];
+ AHCIPortRegs *pr = &ad->port_regs;
+
+ map_page(&ad->lst,
+ ((uint64_t)pr->lst_addr_hi << 32) | pr->lst_addr, 1024);
+ map_page(&ad->res_fis,
+ ((uint64_t)pr->fis_addr_hi << 32) | pr->fis_addr, 256);
+ /*
+ * All pending i/o should be flushed out on a migrate. However,
+ * we might not have cleared the busy_slot since this is done
+ * in a bh. Also, issue i/o against any slots that are pending.
+ */
+ if ((ad->busy_slot != -1) &&
+ !(ad->port.ifs[0].status & (BUSY_STAT|DRQ_STAT))) {
+ pr->cmd_issue &= ~(1 << ad->busy_slot);
+ ad->busy_slot = -1;
+ }
+ check_cmd(s, i);
+ }
+
+ return 0;
+}
+
+const VMStateDescription vmstate_ahci = {
+ .name = "ahci",
+ .version_id = 1,
+ .post_load = ahci_state_post_load,
+ .fields = (VMStateField []) {
+ VMSTATE_STRUCT_VARRAY_POINTER_INT32(dev, AHCIState, ports,
+ vmstate_ahci_device, AHCIDevice),
+ VMSTATE_UINT32(control_regs.cap, AHCIState),
+ VMSTATE_UINT32(control_regs.ghc, AHCIState),
+ VMSTATE_UINT32(control_regs.irqstatus, AHCIState),
+ VMSTATE_UINT32(control_regs.impl, AHCIState),
+ VMSTATE_UINT32(control_regs.version, AHCIState),
+ VMSTATE_UINT32(idp_index, AHCIState),
+ VMSTATE_INT32(ports, AHCIState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
typedef struct SysbusAHCIState {
SysBusDevice busdev;
AHCIState ahci;
@@ -1211,7 +1283,11 @@ typedef struct SysbusAHCIState {
static const VMStateDescription vmstate_sysbus_ahci = {
.name = "sysbus-ahci",
- .unmigratable = 1,
+ .unmigratable = 1, /* Still buggy under I/O load */
+ .fields = (VMStateField []) {
+ VMSTATE_AHCI(ahci, AHCIPCIState),
+ VMSTATE_END_OF_LIST()
+ },
};
static void sysbus_ahci_reset(DeviceState *dev)
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 1200a56ada..85f37fe99d 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -281,11 +281,9 @@ struct AHCIDevice {
QEMUBH *check_bh;
uint8_t *lst;
uint8_t *res_fis;
- int dma_status;
- int done_atapi_packet;
- int busy_slot;
- int init_d2h_sent;
- BlockDriverCompletionFunc *dma_cb;
+ bool done_atapi_packet;
+ int32_t busy_slot;
+ bool init_d2h_sent;
AHCICmdHdr *cur_cmd;
NCQTransferState ncq_tfs[AHCI_MAX_CMDS];
};
@@ -297,7 +295,7 @@ typedef struct AHCIState {
MemoryRegion idp; /* Index-Data Pair I/O port space */
unsigned idp_offset; /* Offset of index in I/O port space */
uint32_t idp_index; /* Current IDP index */
- int ports;
+ int32_t ports;
qemu_irq irq;
DMAContext *dma;
} AHCIState;
@@ -307,6 +305,16 @@ typedef struct AHCIPCIState {
AHCIState ahci;
} AHCIPCIState;
+extern const VMStateDescription vmstate_ahci;
+
+#define VMSTATE_AHCI(_field, _state) { \
+ .name = (stringify(_field)), \
+ .size = sizeof(AHCIState), \
+ .vmsd = &vmstate_ahci, \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, AHCIState), \
+}
+
typedef struct NCQFrame {
uint8_t fis_type;
uint8_t c;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index 14ad0799c3..3743dc3b55 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -1149,8 +1149,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
}
ide_set_irq(s->bus);
break;
+
case WIN_VERIFY_EXT:
- lba48 = 1;
+ lba48 = 1;
+ /* fall through */
case WIN_VERIFY:
case WIN_VERIFY_ONCE:
/* do sector number check ? */
@@ -1158,8 +1160,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
s->status = READY_STAT | SEEK_STAT;
ide_set_irq(s->bus);
break;
+
case WIN_READ_EXT:
- lba48 = 1;
+ lba48 = 1;
+ /* fall through */
case WIN_READ:
case WIN_READ_ONCE:
if (s->drive_kind == IDE_CD) {
@@ -1173,8 +1177,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
s->req_nb_sectors = 1;
ide_sector_read(s);
break;
+
case WIN_WRITE_EXT:
- lba48 = 1;
+ lba48 = 1;
+ /* fall through */
case WIN_WRITE:
case WIN_WRITE_ONCE:
case CFA_WRITE_SECT_WO_ERASE:
@@ -1189,8 +1195,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
ide_transfer_start(s, s->io_buffer, 512, ide_sector_write);
s->media_changed = 1;
break;
+
case WIN_MULTREAD_EXT:
- lba48 = 1;
+ lba48 = 1;
+ /* fall through */
case WIN_MULTREAD:
if (!s->bs) {
goto abort_cmd;
@@ -1202,8 +1210,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
s->req_nb_sectors = s->mult_sectors;
ide_sector_read(s);
break;
+
case WIN_MULTWRITE_EXT:
- lba48 = 1;
+ lba48 = 1;
+ /* fall through */
case WIN_MULTWRITE:
case CFA_WRITE_MULTI_WO_ERASE:
if (!s->bs) {
@@ -1222,8 +1232,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
ide_transfer_start(s, s->io_buffer, 512 * n, ide_sector_write);
s->media_changed = 1;
break;
+
case WIN_READDMA_EXT:
- lba48 = 1;
+ lba48 = 1;
+ /* fall through */
case WIN_READDMA:
case WIN_READDMA_ONCE:
if (!s->bs) {
@@ -1232,8 +1244,10 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
ide_cmd_lba48_transform(s, lba48);
ide_sector_start_dma(s, IDE_DMA_READ);
break;
+
case WIN_WRITEDMA_EXT:
- lba48 = 1;
+ lba48 = 1;
+ /* fall through */
case WIN_WRITEDMA:
case WIN_WRITEDMA_ONCE:
if (!s->bs) {
@@ -1243,14 +1257,17 @@ void ide_exec_cmd(IDEBus *bus, uint32_t val)
ide_sector_start_dma(s, IDE_DMA_WRITE);
s->media_changed = 1;
break;
+
case WIN_READ_NATIVE_MAX_EXT:
- lba48 = 1;
+ lba48 = 1;
+ /* fall through */
case WIN_READ_NATIVE_MAX:
ide_cmd_lba48_transform(s, lba48);
ide_set_sector(s, s->nb_sectors - 1);
s->status = READY_STAT | SEEK_STAT;
ide_set_irq(s->bus);
break;
+
case WIN_CHECKPOWERMODE1:
case WIN_CHECKPOWERMODE2:
s->error = 0;
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 1fb803d340..cc30adc701 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -79,9 +79,15 @@
#define ICH9_IDP_INDEX 0x10
#define ICH9_IDP_INDEX_LOG2 0x04
-static const VMStateDescription vmstate_ahci = {
- .name = "ahci",
- .unmigratable = 1,
+static const VMStateDescription vmstate_ich9_ahci = {
+ .name = "ich9_ahci",
+ .unmigratable = 1, /* Still buggy under I/O load */
+ .version_id = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_PCI_DEVICE(card, AHCIPCIState),
+ VMSTATE_AHCI(ahci, AHCIPCIState),
+ VMSTATE_END_OF_LIST()
+ },
};
static void pci_ich9_reset(DeviceState *dev)
@@ -152,7 +158,7 @@ static void ich_ahci_class_init(ObjectClass *klass, void *data)
k->device_id = PCI_DEVICE_ID_INTEL_82801IR;
k->revision = 0x02;
k->class_id = PCI_CLASS_STORAGE_SATA;
- dc->vmsd = &vmstate_ahci;
+ dc->vmsd = &vmstate_ich9_ahci;
dc->reset = pci_ich9_reset;
}
diff --git a/hw/m25p80.c b/hw/m25p80.c
index d39265632b..788c19608c 100644
--- a/hw/m25p80.c
+++ b/hw/m25p80.c
@@ -358,6 +358,8 @@ static void complete_collecting_data(Flash *s)
s->cur_addr |= s->data[1] << 8;
s->cur_addr |= s->data[2];
+ s->state = STATE_IDLE;
+
switch (s->cmd_in_progress) {
case DPP:
case QPP:
diff --git a/hw/pc.c b/hw/pc.c
index 780b1e4743..34b6dff686 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -551,6 +551,18 @@ int e820_add_entry(uint64_t address, uint64_t length, uint32_t type)
return index;
}
+/* Calculates the limit to CPU APIC ID values
+ *
+ * This function returns the limit for the APIC ID value, so that all
+ * CPU APIC IDs are < pc_apic_id_limit().
+ *
+ * This is used for FW_CFG_MAX_CPUS. See comments on bochs_bios_init().
+ */
+static unsigned int pc_apic_id_limit(unsigned int max_cpus)
+{
+ return x86_cpu_apic_id_from_index(max_cpus - 1) + 1;
+}
+
static void *bochs_bios_init(void)
{
void *fw_cfg;
@@ -558,9 +570,24 @@ static void *bochs_bios_init(void)
size_t smbios_len;
uint64_t *numa_fw_cfg;
int i, j;
+ unsigned int apic_id_limit = pc_apic_id_limit(max_cpus);
fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
-
+ /* FW_CFG_MAX_CPUS is a bit confusing/problematic on x86:
+ *
+ * SeaBIOS needs FW_CFG_MAX_CPUS for CPU hotplug, but the CPU hotplug
+ * QEMU<->SeaBIOS interface is not based on the "CPU index", but on the APIC
+ * ID of hotplugged CPUs[1]. This means that FW_CFG_MAX_CPUS is not the
+ * "maximum number of CPUs", but the "limit to the APIC ID values SeaBIOS
+ * may see".
+ *
+ * So, this means we must not use max_cpus, here, but the maximum possible
+ * APIC ID value, plus one.
+ *
+ * [1] The only kind of "CPU identifier" used between SeaBIOS and QEMU is
+ * the APIC ID, not the "CPU index"
+ */
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)apic_id_limit);
fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
fw_cfg_add_bytes(fw_cfg, FW_CFG_ACPI_TABLES,
@@ -579,21 +606,24 @@ static void *bochs_bios_init(void)
* of nodes, one word for each VCPU->node and one word for each node to
* hold the amount of memory.
*/
- numa_fw_cfg = g_new0(uint64_t, 1 + max_cpus + nb_numa_nodes);
+ numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes);
numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes);
for (i = 0; i < max_cpus; i++) {
+ unsigned int apic_id = x86_cpu_apic_id_from_index(i);
+ assert(apic_id < apic_id_limit);
for (j = 0; j < nb_numa_nodes; j++) {
if (test_bit(i, node_cpumask[j])) {
- numa_fw_cfg[i + 1] = cpu_to_le64(j);
+ numa_fw_cfg[apic_id + 1] = cpu_to_le64(j);
break;
}
}
}
for (i = 0; i < nb_numa_nodes; i++) {
- numa_fw_cfg[max_cpus + 1 + i] = cpu_to_le64(node_mem[i]);
+ numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]);
}
fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg,
- (1 + max_cpus + nb_numa_nodes) * sizeof(*numa_fw_cfg));
+ (1 + apic_id_limit + nb_numa_nodes) *
+ sizeof(*numa_fw_cfg));
return fw_cfg;
}
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 0a6923dcef..b9a9b2efe1 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -235,10 +235,18 @@ static void pc_init_pci(QEMUMachineInitArgs *args)
static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
{
- enable_kvm_pv_eoi();
+ enable_compat_apic_id_mode();
pc_init_pci(args);
}
+/* PC machine init function for pc-0.14 to pc-1.2 */
+static void pc_init_pci_1_2(QEMUMachineInitArgs *args)
+{
+ disable_kvm_pv_eoi();
+ pc_init_pci_1_3(args);
+}
+
+/* PC init function for pc-0.10 to pc-0.13, and reused by xenfv */
static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
{
ram_addr_t ram_size = args->ram_size;
@@ -247,6 +255,8 @@ static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
const char *kernel_cmdline = args->kernel_cmdline;
const char *initrd_filename = args->initrd_filename;
const char *boot_device = args->boot_device;
+ disable_kvm_pv_eoi();
+ enable_compat_apic_id_mode();
pc_init1(get_system_memory(),
get_system_io(),
ram_size, boot_device,
@@ -264,6 +274,8 @@ static void pc_init_isa(QEMUMachineInitArgs *args)
const char *boot_device = args->boot_device;
if (cpu_model == NULL)
cpu_model = "486";
+ disable_kvm_pv_eoi();
+ enable_compat_apic_id_mode();
pc_init1(get_system_memory(),
get_system_io(),
ram_size, boot_device,
@@ -286,7 +298,7 @@ static QEMUMachine pc_i440fx_machine_v1_4 = {
.name = "pc-i440fx-1.4",
.alias = "pc",
.desc = "Standard PC (i440FX + PIIX, 1996)",
- .init = pc_init_pci_1_3,
+ .init = pc_init_pci,
.max_cpus = 255,
.is_default = 1,
DEFAULT_MACHINE_OPTIONS,
@@ -342,7 +354,7 @@ static QEMUMachine pc_machine_v1_3 = {
static QEMUMachine pc_machine_v1_2 = {
.name = "pc-1.2",
.desc = "Standard PC",
- .init = pc_init_pci,
+ .init = pc_init_pci_1_2,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
PC_COMPAT_1_2,
@@ -386,7 +398,7 @@ static QEMUMachine pc_machine_v1_2 = {
static QEMUMachine pc_machine_v1_1 = {
.name = "pc-1.1",
.desc = "Standard PC",
- .init = pc_init_pci,
+ .init = pc_init_pci_1_2,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
PC_COMPAT_1_1,
@@ -422,7 +434,7 @@ static QEMUMachine pc_machine_v1_1 = {
static QEMUMachine pc_machine_v1_0 = {
.name = "pc-1.0",
.desc = "Standard PC",
- .init = pc_init_pci,
+ .init = pc_init_pci_1_2,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
PC_COMPAT_1_0,
@@ -438,7 +450,7 @@ static QEMUMachine pc_machine_v1_0 = {
static QEMUMachine pc_machine_v0_15 = {
.name = "pc-0.15",
.desc = "Standard PC",
- .init = pc_init_pci,
+ .init = pc_init_pci_1_2,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
PC_COMPAT_0_15,
@@ -471,7 +483,7 @@ static QEMUMachine pc_machine_v0_15 = {
static QEMUMachine pc_machine_v0_14 = {
.name = "pc-0.14",
.desc = "Standard PC",
- .init = pc_init_pci,
+ .init = pc_init_pci_1_2,
.max_cpus = 255,
.compat_props = (GlobalProperty[]) {
PC_COMPAT_0_14,
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 6de810bde1..065ea871b3 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -413,6 +413,7 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
/* No PCI init: the BIOS will do it */
fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 9ed303a5e5..2778e45879 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -299,6 +299,7 @@ static void ppc_heathrow_init(QEMUMachineInitArgs *args)
/* No PCI init: the BIOS will do it */
fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW);
diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index 1b40c2e66e..9f2f41989c 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -1,8 +1,9 @@
obj-y = s390-virtio-bus.o s390-virtio.o
-
-obj-y := $(addprefix ../,$(obj-y))
obj-y += s390-virtio-hcall.o
obj-y += sclp.o
obj-y += event-facility.o
obj-y += sclpquiesce.o sclpconsole.o
obj-y += ipl.o
+obj-y += css.o
+obj-y += s390-virtio-ccw.o
+obj-y += virtio-ccw.o
diff --git a/hw/s390x/css.c b/hw/s390x/css.c
new file mode 100644
index 0000000000..3244201fc7
--- /dev/null
+++ b/hw/s390x/css.c
@@ -0,0 +1,1277 @@
+/*
+ * Channel subsystem base support.
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <hw/qdev.h>
+#include "qemu/bitops.h"
+#include "cpu.h"
+#include "ioinst.h"
+#include "css.h"
+#include "trace.h"
+
+typedef struct CrwContainer {
+ CRW crw;
+ QTAILQ_ENTRY(CrwContainer) sibling;
+} CrwContainer;
+
+typedef struct ChpInfo {
+ uint8_t in_use;
+ uint8_t type;
+ uint8_t is_virtual;
+} ChpInfo;
+
+typedef struct SubchSet {
+ SubchDev *sch[MAX_SCHID + 1];
+ unsigned long schids_used[BITS_TO_LONGS(MAX_SCHID + 1)];
+ unsigned long devnos_used[BITS_TO_LONGS(MAX_SCHID + 1)];
+} SubchSet;
+
+typedef struct CssImage {
+ SubchSet *sch_set[MAX_SSID + 1];
+ ChpInfo chpids[MAX_CHPID + 1];
+} CssImage;
+
+typedef struct ChannelSubSys {
+ QTAILQ_HEAD(, CrwContainer) pending_crws;
+ bool do_crw_mchk;
+ bool crws_lost;
+ uint8_t max_cssid;
+ uint8_t max_ssid;
+ bool chnmon_active;
+ uint64_t chnmon_area;
+ CssImage *css[MAX_CSSID + 1];
+ uint8_t default_cssid;
+} ChannelSubSys;
+
+static ChannelSubSys *channel_subsys;
+
+int css_create_css_image(uint8_t cssid, bool default_image)
+{
+ trace_css_new_image(cssid, default_image ? "(default)" : "");
+ if (cssid > MAX_CSSID) {
+ return -EINVAL;
+ }
+ if (channel_subsys->css[cssid]) {
+ return -EBUSY;
+ }
+ channel_subsys->css[cssid] = g_malloc0(sizeof(CssImage));
+ if (default_image) {
+ channel_subsys->default_cssid = cssid;
+ }
+ return 0;
+}
+
+static uint16_t css_build_subchannel_id(SubchDev *sch)
+{
+ if (channel_subsys->max_cssid > 0) {
+ return (sch->cssid << 8) | (1 << 3) | (sch->ssid << 1) | 1;
+ }
+ return (sch->ssid << 1) | 1;
+}
+
+static void css_inject_io_interrupt(SubchDev *sch)
+{
+ S390CPU *cpu = s390_cpu_addr2state(0);
+ uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11;
+
+ trace_css_io_interrupt(sch->cssid, sch->ssid, sch->schid,
+ sch->curr_status.pmcw.intparm, isc, "");
+ s390_io_interrupt(cpu,
+ css_build_subchannel_id(sch),
+ sch->schid,
+ sch->curr_status.pmcw.intparm,
+ (0x80 >> isc) << 24);
+}
+
+void css_conditional_io_interrupt(SubchDev *sch)
+{
+ /*
+ * If the subchannel is not currently status pending, make it pending
+ * with alert status.
+ */
+ if (!(sch->curr_status.scsw.ctrl & SCSW_STCTL_STATUS_PEND)) {
+ S390CPU *cpu = s390_cpu_addr2state(0);
+ uint8_t isc = (sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_ISC) >> 11;
+
+ trace_css_io_interrupt(sch->cssid, sch->ssid, sch->schid,
+ sch->curr_status.pmcw.intparm, isc,
+ "(unsolicited)");
+ sch->curr_status.scsw.ctrl &= ~SCSW_CTRL_MASK_STCTL;
+ sch->curr_status.scsw.ctrl |=
+ SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+ /* Inject an I/O interrupt. */
+ s390_io_interrupt(cpu,
+ css_build_subchannel_id(sch),
+ sch->schid,
+ sch->curr_status.pmcw.intparm,
+ (0x80 >> isc) << 24);
+ }
+}
+
+static void sch_handle_clear_func(SubchDev *sch)
+{
+ PMCW *p = &sch->curr_status.pmcw;
+ SCSW *s = &sch->curr_status.scsw;
+ int path;
+
+ /* Path management: In our simple css, we always choose the only path. */
+ path = 0x80;
+
+ /* Reset values prior to 'issueing the clear signal'. */
+ p->lpum = 0;
+ p->pom = 0xff;
+ s->flags &= ~SCSW_FLAGS_MASK_PNO;
+
+ /* We always 'attempt to issue the clear signal', and we always succeed. */
+ sch->orb = NULL;
+ sch->channel_prog = 0x0;
+ sch->last_cmd_valid = false;
+ s->ctrl &= ~SCSW_ACTL_CLEAR_PEND;
+ s->ctrl |= SCSW_STCTL_STATUS_PEND;
+
+ s->dstat = 0;
+ s->cstat = 0;
+ p->lpum = path;
+
+}
+
+static void sch_handle_halt_func(SubchDev *sch)
+{
+
+ PMCW *p = &sch->curr_status.pmcw;
+ SCSW *s = &sch->curr_status.scsw;
+ int path;
+
+ /* Path management: In our simple css, we always choose the only path. */
+ path = 0x80;
+
+ /* We always 'attempt to issue the halt signal', and we always succeed. */
+ sch->orb = NULL;
+ sch->channel_prog = 0x0;
+ sch->last_cmd_valid = false;
+ s->ctrl &= ~SCSW_ACTL_HALT_PEND;
+ s->ctrl |= SCSW_STCTL_STATUS_PEND;
+
+ if ((s->ctrl & (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) ||
+ !((s->ctrl & SCSW_ACTL_START_PEND) ||
+ (s->ctrl & SCSW_ACTL_SUSP))) {
+ s->dstat = SCSW_DSTAT_DEVICE_END;
+ }
+ s->cstat = 0;
+ p->lpum = path;
+
+}
+
+static void copy_sense_id_to_guest(SenseId *dest, SenseId *src)
+{
+ int i;
+
+ dest->reserved = src->reserved;
+ dest->cu_type = cpu_to_be16(src->cu_type);
+ dest->cu_model = src->cu_model;
+ dest->dev_type = cpu_to_be16(src->dev_type);
+ dest->dev_model = src->dev_model;
+ dest->unused = src->unused;
+ for (i = 0; i < ARRAY_SIZE(dest->ciw); i++) {
+ dest->ciw[i].type = src->ciw[i].type;
+ dest->ciw[i].command = src->ciw[i].command;
+ dest->ciw[i].count = cpu_to_be16(src->ciw[i].count);
+ }
+}
+
+static CCW1 copy_ccw_from_guest(hwaddr addr)
+{
+ CCW1 tmp;
+ CCW1 ret;
+
+ cpu_physical_memory_read(addr, &tmp, sizeof(tmp));
+ ret.cmd_code = tmp.cmd_code;
+ ret.flags = tmp.flags;
+ ret.count = be16_to_cpu(tmp.count);
+ ret.cda = be32_to_cpu(tmp.cda);
+
+ return ret;
+}
+
+static int css_interpret_ccw(SubchDev *sch, hwaddr ccw_addr)
+{
+ int ret;
+ bool check_len;
+ int len;
+ CCW1 ccw;
+
+ if (!ccw_addr) {
+ return -EIO;
+ }
+
+ ccw = copy_ccw_from_guest(ccw_addr);
+
+ /* Check for invalid command codes. */
+ if ((ccw.cmd_code & 0x0f) == 0) {
+ return -EINVAL;
+ }
+ if (((ccw.cmd_code & 0x0f) == CCW_CMD_TIC) &&
+ ((ccw.cmd_code & 0xf0) != 0)) {
+ return -EINVAL;
+ }
+
+ if (ccw.flags & CCW_FLAG_SUSPEND) {
+ return -EINPROGRESS;
+ }
+
+ check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
+
+ /* Look at the command. */
+ switch (ccw.cmd_code) {
+ case CCW_CMD_NOOP:
+ /* Nothing to do. */
+ ret = 0;
+ break;
+ case CCW_CMD_BASIC_SENSE:
+ if (check_len) {
+ if (ccw.count != sizeof(sch->sense_data)) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ len = MIN(ccw.count, sizeof(sch->sense_data));
+ cpu_physical_memory_write(ccw.cda, sch->sense_data, len);
+ sch->curr_status.scsw.count = ccw.count - len;
+ memset(sch->sense_data, 0, sizeof(sch->sense_data));
+ ret = 0;
+ break;
+ case CCW_CMD_SENSE_ID:
+ {
+ SenseId sense_id;
+
+ copy_sense_id_to_guest(&sense_id, &sch->id);
+ /* Sense ID information is device specific. */
+ if (check_len) {
+ if (ccw.count != sizeof(sense_id)) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ len = MIN(ccw.count, sizeof(sense_id));
+ /*
+ * Only indicate 0xff in the first sense byte if we actually
+ * have enough place to store at least bytes 0-3.
+ */
+ if (len >= 4) {
+ sense_id.reserved = 0xff;
+ } else {
+ sense_id.reserved = 0;
+ }
+ cpu_physical_memory_write(ccw.cda, &sense_id, len);
+ sch->curr_status.scsw.count = ccw.count - len;
+ ret = 0;
+ break;
+ }
+ case CCW_CMD_TIC:
+ if (sch->last_cmd_valid && (sch->last_cmd.cmd_code == CCW_CMD_TIC)) {
+ ret = -EINVAL;
+ break;
+ }
+ if (ccw.flags & (CCW_FLAG_CC | CCW_FLAG_DC)) {
+ ret = -EINVAL;
+ break;
+ }
+ sch->channel_prog = ccw.cda;
+ ret = -EAGAIN;
+ break;
+ default:
+ if (sch->ccw_cb) {
+ /* Handle device specific commands. */
+ ret = sch->ccw_cb(sch, ccw);
+ } else {
+ ret = -ENOSYS;
+ }
+ break;
+ }
+ sch->last_cmd = ccw;
+ sch->last_cmd_valid = true;
+ if (ret == 0) {
+ if (ccw.flags & CCW_FLAG_CC) {
+ sch->channel_prog += 8;
+ ret = -EAGAIN;
+ }
+ }
+
+ return ret;
+}
+
+static void sch_handle_start_func(SubchDev *sch)
+{
+
+ PMCW *p = &sch->curr_status.pmcw;
+ SCSW *s = &sch->curr_status.scsw;
+ ORB *orb = sch->orb;
+ int path;
+ int ret;
+
+ /* Path management: In our simple css, we always choose the only path. */
+ path = 0x80;
+
+ if (!(s->ctrl & SCSW_ACTL_SUSP)) {
+ /* Look at the orb and try to execute the channel program. */
+ p->intparm = orb->intparm;
+ if (!(orb->lpm & path)) {
+ /* Generate a deferred cc 3 condition. */
+ s->flags |= SCSW_FLAGS_MASK_CC;
+ s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+ s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
+ return;
+ }
+ } else {
+ s->ctrl &= ~(SCSW_ACTL_SUSP | SCSW_ACTL_RESUME_PEND);
+ }
+ sch->last_cmd_valid = false;
+ do {
+ ret = css_interpret_ccw(sch, sch->channel_prog);
+ switch (ret) {
+ case -EAGAIN:
+ /* ccw chain, continue processing */
+ break;
+ case 0:
+ /* success */
+ s->ctrl &= ~SCSW_ACTL_START_PEND;
+ s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+ s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+ SCSW_STCTL_STATUS_PEND;
+ s->dstat = SCSW_DSTAT_CHANNEL_END | SCSW_DSTAT_DEVICE_END;
+ break;
+ case -ENOSYS:
+ /* unsupported command, generate unit check (command reject) */
+ s->ctrl &= ~SCSW_ACTL_START_PEND;
+ s->dstat = SCSW_DSTAT_UNIT_CHECK;
+ /* Set sense bit 0 in ecw0. */
+ sch->sense_data[0] = 0x80;
+ s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+ s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+ SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+ break;
+ case -EFAULT:
+ /* memory problem, generate channel data check */
+ s->ctrl &= ~SCSW_ACTL_START_PEND;
+ s->cstat = SCSW_CSTAT_DATA_CHECK;
+ s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+ s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+ SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+ break;
+ case -EBUSY:
+ /* subchannel busy, generate deferred cc 1 */
+ s->flags &= ~SCSW_FLAGS_MASK_CC;
+ s->flags |= (1 << 8);
+ s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+ s->ctrl |= SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+ break;
+ case -EINPROGRESS:
+ /* channel program has been suspended */
+ s->ctrl &= ~SCSW_ACTL_START_PEND;
+ s->ctrl |= SCSW_ACTL_SUSP;
+ break;
+ default:
+ /* error, generate channel program check */
+ s->ctrl &= ~SCSW_ACTL_START_PEND;
+ s->cstat = SCSW_CSTAT_PROG_CHECK;
+ s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+ s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
+ SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
+ break;
+ }
+ } while (ret == -EAGAIN);
+
+}
+
+/*
+ * On real machines, this would run asynchronously to the main vcpus.
+ * We might want to make some parts of the ssch handling (interpreting
+ * read/writes) asynchronous later on if we start supporting more than
+ * our current very simple devices.
+ */
+static void do_subchannel_work(SubchDev *sch)
+{
+
+ SCSW *s = &sch->curr_status.scsw;
+
+ if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
+ sch_handle_clear_func(sch);
+ } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
+ sch_handle_halt_func(sch);
+ } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
+ sch_handle_start_func(sch);
+ } else {
+ /* Cannot happen. */
+ return;
+ }
+ css_inject_io_interrupt(sch);
+}
+
+static void copy_pmcw_to_guest(PMCW *dest, const PMCW *src)
+{
+ int i;
+
+ dest->intparm = cpu_to_be32(src->intparm);
+ dest->flags = cpu_to_be16(src->flags);
+ dest->devno = cpu_to_be16(src->devno);
+ dest->lpm = src->lpm;
+ dest->pnom = src->pnom;
+ dest->lpum = src->lpum;
+ dest->pim = src->pim;
+ dest->mbi = cpu_to_be16(src->mbi);
+ dest->pom = src->pom;
+ dest->pam = src->pam;
+ for (i = 0; i < ARRAY_SIZE(dest->chpid); i++) {
+ dest->chpid[i] = src->chpid[i];
+ }
+ dest->chars = cpu_to_be32(src->chars);
+}
+
+static void copy_scsw_to_guest(SCSW *dest, const SCSW *src)
+{
+ dest->flags = cpu_to_be16(src->flags);
+ dest->ctrl = cpu_to_be16(src->ctrl);
+ dest->cpa = cpu_to_be32(src->cpa);
+ dest->dstat = src->dstat;
+ dest->cstat = src->cstat;
+ dest->count = cpu_to_be16(src->count);
+}
+
+static void copy_schib_to_guest(SCHIB *dest, const SCHIB *src)
+{
+ int i;
+
+ copy_pmcw_to_guest(&dest->pmcw, &src->pmcw);
+ copy_scsw_to_guest(&dest->scsw, &src->scsw);
+ dest->mba = cpu_to_be64(src->mba);
+ for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
+ dest->mda[i] = src->mda[i];
+ }
+}
+
+int css_do_stsch(SubchDev *sch, SCHIB *schib)
+{
+ /* Use current status. */
+ copy_schib_to_guest(schib, &sch->curr_status);
+ return 0;
+}
+
+static void copy_pmcw_from_guest(PMCW *dest, const PMCW *src)
+{
+ int i;
+
+ dest->intparm = be32_to_cpu(src->intparm);
+ dest->flags = be16_to_cpu(src->flags);
+ dest->devno = be16_to_cpu(src->devno);
+ dest->lpm = src->lpm;
+ dest->pnom = src->pnom;
+ dest->lpum = src->lpum;
+ dest->pim = src->pim;
+ dest->mbi = be16_to_cpu(src->mbi);
+ dest->pom = src->pom;
+ dest->pam = src->pam;
+ for (i = 0; i < ARRAY_SIZE(dest->chpid); i++) {
+ dest->chpid[i] = src->chpid[i];
+ }
+ dest->chars = be32_to_cpu(src->chars);
+}
+
+static void copy_scsw_from_guest(SCSW *dest, const SCSW *src)
+{
+ dest->flags = be16_to_cpu(src->flags);
+ dest->ctrl = be16_to_cpu(src->ctrl);
+ dest->cpa = be32_to_cpu(src->cpa);
+ dest->dstat = src->dstat;
+ dest->cstat = src->cstat;
+ dest->count = be16_to_cpu(src->count);
+}
+
+static void copy_schib_from_guest(SCHIB *dest, const SCHIB *src)
+{
+ int i;
+
+ copy_pmcw_from_guest(&dest->pmcw, &src->pmcw);
+ copy_scsw_from_guest(&dest->scsw, &src->scsw);
+ dest->mba = be64_to_cpu(src->mba);
+ for (i = 0; i < ARRAY_SIZE(dest->mda); i++) {
+ dest->mda[i] = src->mda[i];
+ }
+}
+
+int css_do_msch(SubchDev *sch, SCHIB *orig_schib)
+{
+ SCSW *s = &sch->curr_status.scsw;
+ PMCW *p = &sch->curr_status.pmcw;
+ int ret;
+ SCHIB schib;
+
+ if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_DNV)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+ ret = -EINPROGRESS;
+ goto out;
+ }
+
+ if (s->ctrl &
+ (SCSW_FCTL_START_FUNC|SCSW_FCTL_HALT_FUNC|SCSW_FCTL_CLEAR_FUNC)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ copy_schib_from_guest(&schib, orig_schib);
+ /* Only update the program-modifiable fields. */
+ p->intparm = schib.pmcw.intparm;
+ p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+ PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+ PMCW_FLAGS_MASK_MP);
+ p->flags |= schib.pmcw.flags &
+ (PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+ PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+ PMCW_FLAGS_MASK_MP);
+ p->lpm = schib.pmcw.lpm;
+ p->mbi = schib.pmcw.mbi;
+ p->pom = schib.pmcw.pom;
+ p->chars &= ~(PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE);
+ p->chars |= schib.pmcw.chars &
+ (PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_CSENSE);
+ sch->curr_status.mba = schib.mba;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int css_do_xsch(SubchDev *sch)
+{
+ SCSW *s = &sch->curr_status.scsw;
+ PMCW *p = &sch->curr_status.pmcw;
+ int ret;
+
+ if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (!(s->ctrl & SCSW_CTRL_MASK_FCTL) ||
+ ((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) ||
+ (!(s->ctrl &
+ (SCSW_ACTL_RESUME_PEND | SCSW_ACTL_START_PEND | SCSW_ACTL_SUSP))) ||
+ (s->ctrl & SCSW_ACTL_SUBCH_ACTIVE)) {
+ ret = -EINPROGRESS;
+ goto out;
+ }
+
+ if (s->ctrl & SCSW_CTRL_MASK_STCTL) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* Cancel the current operation. */
+ s->ctrl &= ~(SCSW_FCTL_START_FUNC |
+ SCSW_ACTL_RESUME_PEND |
+ SCSW_ACTL_START_PEND |
+ SCSW_ACTL_SUSP);
+ sch->channel_prog = 0x0;
+ sch->last_cmd_valid = false;
+ sch->orb = NULL;
+ s->dstat = 0;
+ s->cstat = 0;
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int css_do_csch(SubchDev *sch)
+{
+ SCSW *s = &sch->curr_status.scsw;
+ PMCW *p = &sch->curr_status.pmcw;
+ int ret;
+
+ if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /* Trigger the clear function. */
+ s->ctrl &= ~(SCSW_CTRL_MASK_FCTL | SCSW_CTRL_MASK_ACTL);
+ s->ctrl |= SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_CLEAR_FUNC;
+
+ do_subchannel_work(sch);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int css_do_hsch(SubchDev *sch)
+{
+ SCSW *s = &sch->curr_status.scsw;
+ PMCW *p = &sch->curr_status.pmcw;
+ int ret;
+
+ if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (((s->ctrl & SCSW_CTRL_MASK_STCTL) == SCSW_STCTL_STATUS_PEND) ||
+ (s->ctrl & (SCSW_STCTL_PRIMARY |
+ SCSW_STCTL_SECONDARY |
+ SCSW_STCTL_ALERT))) {
+ ret = -EINPROGRESS;
+ goto out;
+ }
+
+ if (s->ctrl & (SCSW_FCTL_HALT_FUNC | SCSW_FCTL_CLEAR_FUNC)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* Trigger the halt function. */
+ s->ctrl |= SCSW_FCTL_HALT_FUNC;
+ s->ctrl &= ~SCSW_FCTL_START_FUNC;
+ if (((s->ctrl & SCSW_CTRL_MASK_ACTL) ==
+ (SCSW_ACTL_SUBCH_ACTIVE | SCSW_ACTL_DEVICE_ACTIVE)) &&
+ ((s->ctrl & SCSW_CTRL_MASK_STCTL) == SCSW_STCTL_INTERMEDIATE)) {
+ s->ctrl &= ~SCSW_STCTL_STATUS_PEND;
+ }
+ s->ctrl |= SCSW_ACTL_HALT_PEND;
+
+ do_subchannel_work(sch);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void css_update_chnmon(SubchDev *sch)
+{
+ if (!(sch->curr_status.pmcw.flags & PMCW_FLAGS_MASK_MME)) {
+ /* Not active. */
+ return;
+ }
+ /* The counter is conveniently located at the beginning of the struct. */
+ if (sch->curr_status.pmcw.chars & PMCW_CHARS_MASK_MBFC) {
+ /* Format 1, per-subchannel area. */
+ uint32_t count;
+
+ count = ldl_phys(sch->curr_status.mba);
+ count++;
+ stl_phys(sch->curr_status.mba, count);
+ } else {
+ /* Format 0, global area. */
+ uint32_t offset;
+ uint16_t count;
+
+ offset = sch->curr_status.pmcw.mbi << 5;
+ count = lduw_phys(channel_subsys->chnmon_area + offset);
+ count++;
+ stw_phys(channel_subsys->chnmon_area + offset, count);
+ }
+}
+
+int css_do_ssch(SubchDev *sch, ORB *orb)
+{
+ SCSW *s = &sch->curr_status.scsw;
+ PMCW *p = &sch->curr_status.pmcw;
+ int ret;
+
+ if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+ ret = -EINPROGRESS;
+ goto out;
+ }
+
+ if (s->ctrl & (SCSW_FCTL_START_FUNC |
+ SCSW_FCTL_HALT_FUNC |
+ SCSW_FCTL_CLEAR_FUNC)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ /* If monitoring is active, update counter. */
+ if (channel_subsys->chnmon_active) {
+ css_update_chnmon(sch);
+ }
+ sch->orb = orb;
+ sch->channel_prog = orb->cpa;
+ /* Trigger the start function. */
+ s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND);
+ s->flags &= ~SCSW_FLAGS_MASK_PNO;
+
+ do_subchannel_work(sch);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void copy_irb_to_guest(IRB *dest, const IRB *src)
+{
+ int i;
+
+ copy_scsw_to_guest(&dest->scsw, &src->scsw);
+
+ for (i = 0; i < ARRAY_SIZE(dest->esw); i++) {
+ dest->esw[i] = cpu_to_be32(src->esw[i]);
+ }
+ for (i = 0; i < ARRAY_SIZE(dest->ecw); i++) {
+ dest->ecw[i] = cpu_to_be32(src->ecw[i]);
+ }
+ for (i = 0; i < ARRAY_SIZE(dest->emw); i++) {
+ dest->emw[i] = cpu_to_be32(src->emw[i]);
+ }
+}
+
+int css_do_tsch(SubchDev *sch, IRB *target_irb)
+{
+ SCSW *s = &sch->curr_status.scsw;
+ PMCW *p = &sch->curr_status.pmcw;
+ uint16_t stctl;
+ uint16_t fctl;
+ uint16_t actl;
+ IRB irb;
+ int ret;
+
+ if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+ ret = 3;
+ goto out;
+ }
+
+ stctl = s->ctrl & SCSW_CTRL_MASK_STCTL;
+ fctl = s->ctrl & SCSW_CTRL_MASK_FCTL;
+ actl = s->ctrl & SCSW_CTRL_MASK_ACTL;
+
+ /* Prepare the irb for the guest. */
+ memset(&irb, 0, sizeof(IRB));
+
+ /* Copy scsw from current status. */
+ memcpy(&irb.scsw, s, sizeof(SCSW));
+ if (stctl & SCSW_STCTL_STATUS_PEND) {
+ if (s->cstat & (SCSW_CSTAT_DATA_CHECK |
+ SCSW_CSTAT_CHN_CTRL_CHK |
+ SCSW_CSTAT_INTF_CTRL_CHK)) {
+ irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF;
+ irb.esw[0] = 0x04804000;
+ } else {
+ irb.esw[0] = 0x00800000;
+ }
+ /* If a unit check is pending, copy sense data. */
+ if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) &&
+ (p->chars & PMCW_CHARS_MASK_CSENSE)) {
+ irb.scsw.flags |= SCSW_FLAGS_MASK_ESWF | SCSW_FLAGS_MASK_ECTL;
+ memcpy(irb.ecw, sch->sense_data, sizeof(sch->sense_data));
+ irb.esw[1] = 0x02000000 | (sizeof(sch->sense_data) << 8);
+ }
+ }
+ /* Store the irb to the guest. */
+ copy_irb_to_guest(target_irb, &irb);
+
+ /* Clear conditions on subchannel, if applicable. */
+ if (stctl & SCSW_STCTL_STATUS_PEND) {
+ s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
+ if ((stctl != (SCSW_STCTL_INTERMEDIATE | SCSW_STCTL_STATUS_PEND)) ||
+ ((fctl & SCSW_FCTL_HALT_FUNC) &&
+ (actl & SCSW_ACTL_SUSP))) {
+ s->ctrl &= ~SCSW_CTRL_MASK_FCTL;
+ }
+ if (stctl != (SCSW_STCTL_INTERMEDIATE | SCSW_STCTL_STATUS_PEND)) {
+ s->flags &= ~SCSW_FLAGS_MASK_PNO;
+ s->ctrl &= ~(SCSW_ACTL_RESUME_PEND |
+ SCSW_ACTL_START_PEND |
+ SCSW_ACTL_HALT_PEND |
+ SCSW_ACTL_CLEAR_PEND |
+ SCSW_ACTL_SUSP);
+ } else {
+ if ((actl & SCSW_ACTL_SUSP) &&
+ (fctl & SCSW_FCTL_START_FUNC)) {
+ s->flags &= ~SCSW_FLAGS_MASK_PNO;
+ if (fctl & SCSW_FCTL_HALT_FUNC) {
+ s->ctrl &= ~(SCSW_ACTL_RESUME_PEND |
+ SCSW_ACTL_START_PEND |
+ SCSW_ACTL_HALT_PEND |
+ SCSW_ACTL_CLEAR_PEND |
+ SCSW_ACTL_SUSP);
+ } else {
+ s->ctrl &= ~SCSW_ACTL_RESUME_PEND;
+ }
+ }
+ }
+ /* Clear pending sense data. */
+ if (p->chars & PMCW_CHARS_MASK_CSENSE) {
+ memset(sch->sense_data, 0 , sizeof(sch->sense_data));
+ }
+ }
+
+ ret = ((stctl & SCSW_STCTL_STATUS_PEND) == 0);
+
+out:
+ return ret;
+}
+
+static void copy_crw_to_guest(CRW *dest, const CRW *src)
+{
+ dest->flags = cpu_to_be16(src->flags);
+ dest->rsid = cpu_to_be16(src->rsid);
+}
+
+int css_do_stcrw(CRW *crw)
+{
+ CrwContainer *crw_cont;
+ int ret;
+
+ crw_cont = QTAILQ_FIRST(&channel_subsys->pending_crws);
+ if (crw_cont) {
+ QTAILQ_REMOVE(&channel_subsys->pending_crws, crw_cont, sibling);
+ copy_crw_to_guest(crw, &crw_cont->crw);
+ g_free(crw_cont);
+ ret = 0;
+ } else {
+ /* List was empty, turn crw machine checks on again. */
+ memset(crw, 0, sizeof(*crw));
+ channel_subsys->do_crw_mchk = true;
+ ret = 1;
+ }
+
+ return ret;
+}
+
+int css_do_tpi(IOIntCode *int_code, int lowcore)
+{
+ /* No pending interrupts for !KVM. */
+ return 0;
+ }
+
+int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
+ int rfmt, void *buf)
+{
+ int i, desc_size;
+ uint32_t words[8];
+ uint32_t chpid_type_word;
+ CssImage *css;
+
+ if (!m && !cssid) {
+ css = channel_subsys->css[channel_subsys->default_cssid];
+ } else {
+ css = channel_subsys->css[cssid];
+ }
+ if (!css) {
+ return 0;
+ }
+ desc_size = 0;
+ for (i = f_chpid; i <= l_chpid; i++) {
+ if (css->chpids[i].in_use) {
+ chpid_type_word = 0x80000000 | (css->chpids[i].type << 8) | i;
+ if (rfmt == 0) {
+ words[0] = cpu_to_be32(chpid_type_word);
+ words[1] = 0;
+ memcpy(buf + desc_size, words, 8);
+ desc_size += 8;
+ } else if (rfmt == 1) {
+ words[0] = cpu_to_be32(chpid_type_word);
+ words[1] = 0;
+ words[2] = 0;
+ words[3] = 0;
+ words[4] = 0;
+ words[5] = 0;
+ words[6] = 0;
+ words[7] = 0;
+ memcpy(buf + desc_size, words, 32);
+ desc_size += 32;
+ }
+ }
+ }
+ return desc_size;
+}
+
+void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo)
+{
+ /* dct is currently ignored (not really meaningful for our devices) */
+ /* TODO: Don't ignore mbk. */
+ if (update && !channel_subsys->chnmon_active) {
+ /* Enable measuring. */
+ channel_subsys->chnmon_area = mbo;
+ channel_subsys->chnmon_active = true;
+ }
+ if (!update && channel_subsys->chnmon_active) {
+ /* Disable measuring. */
+ channel_subsys->chnmon_area = 0;
+ channel_subsys->chnmon_active = false;
+ }
+}
+
+int css_do_rsch(SubchDev *sch)
+{
+ SCSW *s = &sch->curr_status.scsw;
+ PMCW *p = &sch->curr_status.pmcw;
+ int ret;
+
+ if (!(p->flags & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA))) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
+ ret = -EINPROGRESS;
+ goto out;
+ }
+
+ if (((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) ||
+ (s->ctrl & SCSW_ACTL_RESUME_PEND) ||
+ (!(s->ctrl & SCSW_ACTL_SUSP))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* If monitoring is active, update counter. */
+ if (channel_subsys->chnmon_active) {
+ css_update_chnmon(sch);
+ }
+
+ s->ctrl |= SCSW_ACTL_RESUME_PEND;
+ do_subchannel_work(sch);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int css_do_rchp(uint8_t cssid, uint8_t chpid)
+{
+ uint8_t real_cssid;
+
+ if (cssid > channel_subsys->max_cssid) {
+ return -EINVAL;
+ }
+ if (channel_subsys->max_cssid == 0) {
+ real_cssid = channel_subsys->default_cssid;
+ } else {
+ real_cssid = cssid;
+ }
+ if (!channel_subsys->css[real_cssid]) {
+ return -EINVAL;
+ }
+
+ if (!channel_subsys->css[real_cssid]->chpids[chpid].in_use) {
+ return -ENODEV;
+ }
+
+ if (!channel_subsys->css[real_cssid]->chpids[chpid].is_virtual) {
+ fprintf(stderr,
+ "rchp unsupported for non-virtual chpid %x.%02x!\n",
+ real_cssid, chpid);
+ return -ENODEV;
+ }
+
+ /* We don't really use a channel path, so we're done here. */
+ css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT,
+ channel_subsys->max_cssid > 0 ? 1 : 0, chpid);
+ if (channel_subsys->max_cssid > 0) {
+ css_queue_crw(CRW_RSC_CHP, CRW_ERC_INIT, 0, real_cssid << 8);
+ }
+ return 0;
+}
+
+bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+ SubchSet *set;
+
+ if (cssid > MAX_CSSID || ssid > MAX_SSID || !channel_subsys->css[cssid] ||
+ !channel_subsys->css[cssid]->sch_set[ssid]) {
+ return true;
+ }
+ set = channel_subsys->css[cssid]->sch_set[ssid];
+ return schid > find_last_bit(set->schids_used,
+ (MAX_SCHID + 1) / sizeof(unsigned long));
+}
+
+static int css_add_virtual_chpid(uint8_t cssid, uint8_t chpid, uint8_t type)
+{
+ CssImage *css;
+
+ trace_css_chpid_add(cssid, chpid, type);
+ if (cssid > MAX_CSSID) {
+ return -EINVAL;
+ }
+ css = channel_subsys->css[cssid];
+ if (!css) {
+ return -EINVAL;
+ }
+ if (css->chpids[chpid].in_use) {
+ return -EEXIST;
+ }
+ css->chpids[chpid].in_use = 1;
+ css->chpids[chpid].type = type;
+ css->chpids[chpid].is_virtual = 1;
+
+ css_generate_chp_crws(cssid, chpid);
+
+ return 0;
+}
+
+void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type)
+{
+ PMCW *p = &sch->curr_status.pmcw;
+ SCSW *s = &sch->curr_status.scsw;
+ int i;
+ CssImage *css = channel_subsys->css[sch->cssid];
+
+ assert(css != NULL);
+ memset(p, 0, sizeof(PMCW));
+ p->flags |= PMCW_FLAGS_MASK_DNV;
+ p->devno = sch->devno;
+ /* single path */
+ p->pim = 0x80;
+ p->pom = 0xff;
+ p->pam = 0x80;
+ p->chpid[0] = chpid;
+ if (!css->chpids[chpid].in_use) {
+ css_add_virtual_chpid(sch->cssid, chpid, type);
+ }
+
+ memset(s, 0, sizeof(SCSW));
+ sch->curr_status.mba = 0;
+ for (i = 0; i < ARRAY_SIZE(sch->curr_status.mda); i++) {
+ sch->curr_status.mda[i] = 0;
+ }
+}
+
+SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+ uint8_t real_cssid;
+
+ real_cssid = (!m && (cssid == 0)) ? channel_subsys->default_cssid : cssid;
+
+ if (!channel_subsys->css[real_cssid]) {
+ return NULL;
+ }
+
+ if (!channel_subsys->css[real_cssid]->sch_set[ssid]) {
+ return NULL;
+ }
+
+ return channel_subsys->css[real_cssid]->sch_set[ssid]->sch[schid];
+}
+
+bool css_subch_visible(SubchDev *sch)
+{
+ if (sch->ssid > channel_subsys->max_ssid) {
+ return false;
+ }
+
+ if (sch->cssid != channel_subsys->default_cssid) {
+ return (channel_subsys->max_cssid > 0);
+ }
+
+ return true;
+}
+
+bool css_present(uint8_t cssid)
+{
+ return (channel_subsys->css[cssid] != NULL);
+}
+
+bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno)
+{
+ if (!channel_subsys->css[cssid]) {
+ return false;
+ }
+ if (!channel_subsys->css[cssid]->sch_set[ssid]) {
+ return false;
+ }
+
+ return !!test_bit(devno,
+ channel_subsys->css[cssid]->sch_set[ssid]->devnos_used);
+}
+
+void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
+ uint16_t devno, SubchDev *sch)
+{
+ CssImage *css;
+ SubchSet *s_set;
+
+ trace_css_assign_subch(sch ? "assign" : "deassign", cssid, ssid, schid,
+ devno);
+ if (!channel_subsys->css[cssid]) {
+ fprintf(stderr,
+ "Suspicious call to %s (%x.%x.%04x) for non-existing css!\n",
+ __func__, cssid, ssid, schid);
+ return;
+ }
+ css = channel_subsys->css[cssid];
+
+ if (!css->sch_set[ssid]) {
+ css->sch_set[ssid] = g_malloc0(sizeof(SubchSet));
+ }
+ s_set = css->sch_set[ssid];
+
+ s_set->sch[schid] = sch;
+ if (sch) {
+ set_bit(schid, s_set->schids_used);
+ set_bit(devno, s_set->devnos_used);
+ } else {
+ clear_bit(schid, s_set->schids_used);
+ clear_bit(devno, s_set->devnos_used);
+ }
+}
+
+void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid)
+{
+ CrwContainer *crw_cont;
+
+ trace_css_crw(rsc, erc, rsid, chain ? "(chained)" : "");
+ /* TODO: Maybe use a static crw pool? */
+ crw_cont = g_try_malloc0(sizeof(CrwContainer));
+ if (!crw_cont) {
+ channel_subsys->crws_lost = true;
+ return;
+ }
+ crw_cont->crw.flags = (rsc << 8) | erc;
+ if (chain) {
+ crw_cont->crw.flags |= CRW_FLAGS_MASK_C;
+ }
+ crw_cont->crw.rsid = rsid;
+ if (channel_subsys->crws_lost) {
+ crw_cont->crw.flags |= CRW_FLAGS_MASK_R;
+ channel_subsys->crws_lost = false;
+ }
+
+ QTAILQ_INSERT_TAIL(&channel_subsys->pending_crws, crw_cont, sibling);
+
+ if (channel_subsys->do_crw_mchk) {
+ S390CPU *cpu = s390_cpu_addr2state(0);
+
+ channel_subsys->do_crw_mchk = false;
+ /* Inject crw pending machine check. */
+ s390_crw_mchk(cpu);
+ }
+}
+
+void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
+ int hotplugged, int add)
+{
+ uint8_t guest_cssid;
+ bool chain_crw;
+
+ if (add && !hotplugged) {
+ return;
+ }
+ if (channel_subsys->max_cssid == 0) {
+ /* Default cssid shows up as 0. */
+ guest_cssid = (cssid == channel_subsys->default_cssid) ? 0 : cssid;
+ } else {
+ /* Show real cssid to the guest. */
+ guest_cssid = cssid;
+ }
+ /*
+ * Only notify for higher subchannel sets/channel subsystems if the
+ * guest has enabled it.
+ */
+ if ((ssid > channel_subsys->max_ssid) ||
+ (guest_cssid > channel_subsys->max_cssid) ||
+ ((channel_subsys->max_cssid == 0) &&
+ (cssid != channel_subsys->default_cssid))) {
+ return;
+ }
+ chain_crw = (channel_subsys->max_ssid > 0) ||
+ (channel_subsys->max_cssid > 0);
+ css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, chain_crw ? 1 : 0, schid);
+ if (chain_crw) {
+ css_queue_crw(CRW_RSC_SUBCH, CRW_ERC_IPI, 0,
+ (guest_cssid << 8) | (ssid << 4));
+ }
+}
+
+void css_generate_chp_crws(uint8_t cssid, uint8_t chpid)
+{
+ /* TODO */
+}
+
+int css_enable_mcsse(void)
+{
+ trace_css_enable_facility("mcsse");
+ channel_subsys->max_cssid = MAX_CSSID;
+ return 0;
+}
+
+int css_enable_mss(void)
+{
+ trace_css_enable_facility("mss");
+ channel_subsys->max_ssid = MAX_SSID;
+ return 0;
+}
+
+static void css_init(void)
+{
+ channel_subsys = g_malloc0(sizeof(*channel_subsys));
+ QTAILQ_INIT(&channel_subsys->pending_crws);
+ channel_subsys->do_crw_mchk = true;
+ channel_subsys->crws_lost = false;
+ channel_subsys->chnmon_active = false;
+}
+machine_init(css_init);
+
+void css_reset_sch(SubchDev *sch)
+{
+ PMCW *p = &sch->curr_status.pmcw;
+
+ p->intparm = 0;
+ p->flags &= ~(PMCW_FLAGS_MASK_ISC | PMCW_FLAGS_MASK_ENA |
+ PMCW_FLAGS_MASK_LM | PMCW_FLAGS_MASK_MME |
+ PMCW_FLAGS_MASK_MP | PMCW_FLAGS_MASK_TF);
+ p->flags |= PMCW_FLAGS_MASK_DNV;
+ p->devno = sch->devno;
+ p->pim = 0x80;
+ p->lpm = p->pim;
+ p->pnom = 0;
+ p->lpum = 0;
+ p->mbi = 0;
+ p->pom = 0xff;
+ p->pam = 0x80;
+ p->chars &= ~(PMCW_CHARS_MASK_MBFC | PMCW_CHARS_MASK_XMWME |
+ PMCW_CHARS_MASK_CSENSE);
+
+ memset(&sch->curr_status.scsw, 0, sizeof(sch->curr_status.scsw));
+ sch->curr_status.mba = 0;
+
+ sch->channel_prog = 0x0;
+ sch->last_cmd_valid = false;
+ sch->orb = NULL;
+}
+
+void css_reset(void)
+{
+ CrwContainer *crw_cont;
+
+ /* Clean up monitoring. */
+ channel_subsys->chnmon_active = false;
+ channel_subsys->chnmon_area = 0;
+
+ /* Clear pending CRWs. */
+ while ((crw_cont = QTAILQ_FIRST(&channel_subsys->pending_crws))) {
+ QTAILQ_REMOVE(&channel_subsys->pending_crws, crw_cont, sibling);
+ g_free(crw_cont);
+ }
+ channel_subsys->do_crw_mchk = true;
+ channel_subsys->crws_lost = false;
+
+ /* Reset maximum ids. */
+ channel_subsys->max_cssid = 0;
+ channel_subsys->max_ssid = 0;
+}
diff --git a/hw/s390x/css.h b/hw/s390x/css.h
new file mode 100644
index 0000000000..85ed05d0f5
--- /dev/null
+++ b/hw/s390x/css.h
@@ -0,0 +1,99 @@
+/*
+ * Channel subsystem structures and definitions.
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef CSS_H
+#define CSS_H
+
+#include "ioinst.h"
+
+/* Channel subsystem constants. */
+#define MAX_SCHID 65535
+#define MAX_SSID 3
+#define MAX_CSSID 254 /* 255 is reserved */
+#define MAX_CHPID 255
+
+#define MAX_CIWS 62
+
+typedef struct CIW {
+ uint8_t type;
+ uint8_t command;
+ uint16_t count;
+} QEMU_PACKED CIW;
+
+typedef struct SenseId {
+ /* common part */
+ uint8_t reserved; /* always 0x'FF' */
+ uint16_t cu_type; /* control unit type */
+ uint8_t cu_model; /* control unit model */
+ uint16_t dev_type; /* device type */
+ uint8_t dev_model; /* device model */
+ uint8_t unused; /* padding byte */
+ /* extended part */
+ CIW ciw[MAX_CIWS]; /* variable # of CIWs */
+} QEMU_PACKED SenseId;
+
+/* Channel measurements, from linux/drivers/s390/cio/cmf.c. */
+typedef struct CMB {
+ uint16_t ssch_rsch_count;
+ uint16_t sample_count;
+ uint32_t device_connect_time;
+ uint32_t function_pending_time;
+ uint32_t device_disconnect_time;
+ uint32_t control_unit_queuing_time;
+ uint32_t device_active_only_time;
+ uint32_t reserved[2];
+} QEMU_PACKED CMB;
+
+typedef struct CMBE {
+ uint32_t ssch_rsch_count;
+ uint32_t sample_count;
+ uint32_t device_connect_time;
+ uint32_t function_pending_time;
+ uint32_t device_disconnect_time;
+ uint32_t control_unit_queuing_time;
+ uint32_t device_active_only_time;
+ uint32_t device_busy_time;
+ uint32_t initial_command_response_time;
+ uint32_t reserved[7];
+} QEMU_PACKED CMBE;
+
+struct SubchDev {
+ /* channel-subsystem related things: */
+ uint8_t cssid;
+ uint8_t ssid;
+ uint16_t schid;
+ uint16_t devno;
+ SCHIB curr_status;
+ uint8_t sense_data[32];
+ hwaddr channel_prog;
+ CCW1 last_cmd;
+ bool last_cmd_valid;
+ ORB *orb;
+ /* transport-provided data: */
+ int (*ccw_cb) (SubchDev *, CCW1);
+ SenseId id;
+ void *driver_data;
+};
+
+typedef SubchDev *(*css_subch_cb_func)(uint8_t m, uint8_t cssid, uint8_t ssid,
+ uint16_t schid);
+int css_create_css_image(uint8_t cssid, bool default_image);
+bool css_devno_used(uint8_t cssid, uint8_t ssid, uint16_t devno);
+void css_subch_assign(uint8_t cssid, uint8_t ssid, uint16_t schid,
+ uint16_t devno, SubchDev *sch);
+void css_sch_build_virtual_schib(SubchDev *sch, uint8_t chpid, uint8_t type);
+void css_reset(void);
+void css_reset_sch(SubchDev *sch);
+void css_queue_crw(uint8_t rsc, uint8_t erc, int chain, uint16_t rsid);
+void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
+ int hotplugged, int add);
+void css_generate_chp_crws(uint8_t cssid, uint8_t chpid);
+#endif
diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 7cbbf99fde..86e84153ad 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -159,7 +159,7 @@ static void s390_ipl_class_init(ObjectClass *klass, void *data)
dc->no_user = 1;
}
-static TypeInfo s390_ipl_info = {
+static const TypeInfo s390_ipl_info = {
.class_init = s390_ipl_class_init,
.parent = TYPE_SYS_BUS_DEVICE,
.name = "s390-ipl",
diff --git a/hw/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c
index b5d1f2be16..32f63b07ea 100644
--- a/hw/s390-virtio-bus.c
+++ b/hw/s390x/s390-virtio-bus.c
@@ -17,12 +17,12 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "hw.h"
+#include "hw/hw.h"
#include "block/block.h"
#include "sysemu/sysemu.h"
-#include "boards.h"
+#include "hw/boards.h"
#include "monitor/monitor.h"
-#include "loader.h"
+#include "hw/loader.h"
#include "elf.h"
#include "hw/virtio.h"
#include "hw/virtio-rng.h"
@@ -31,7 +31,7 @@
#include "hw/sysbus.h"
#include "sysemu/kvm.h"
-#include "hw/s390-virtio-bus.h"
+#include "hw/s390x/s390-virtio-bus.h"
#include "hw/virtio-bus.h"
/* #define DEBUG_S390 */
@@ -508,6 +508,13 @@ static int s390_virtio_busdev_init(DeviceState *dev)
return _info->init(_dev);
}
+static void s390_virtio_busdev_reset(DeviceState *dev)
+{
+ VirtIOS390Device *_dev = (VirtIOS390Device *)dev;
+
+ virtio_reset(_dev->vdev);
+}
+
static void virtio_s390_device_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@@ -515,6 +522,7 @@ static void virtio_s390_device_class_init(ObjectClass *klass, void *data)
dc->init = s390_virtio_busdev_init;
dc->bus_type = TYPE_S390_VIRTIO_BUS;
dc->unplug = qdev_simple_unplug_cb;
+ dc->reset = s390_virtio_busdev_reset;
}
static const TypeInfo virtio_s390_device_info = {
diff --git a/hw/s390-virtio-bus.h b/hw/s390x/s390-virtio-bus.h
index 438b37fd82..4aacf83998 100644
--- a/hw/s390-virtio-bus.h
+++ b/hw/s390x/s390-virtio-bus.h
@@ -19,12 +19,12 @@
#ifndef HW_S390_VIRTIO_BUS_H
#define HW_S390_VIRTIO_BUS_H 1
-#include "virtio-blk.h"
-#include "virtio-net.h"
-#include "virtio-rng.h"
-#include "virtio-serial.h"
-#include "virtio-scsi.h"
-#include "virtio-bus.h"
+#include "hw/virtio-blk.h"
+#include "hw/virtio-net.h"
+#include "hw/virtio-rng.h"
+#include "hw/virtio-serial.h"
+#include "hw/virtio-scsi.h"
+#include "hw/virtio-bus.h"
#define VIRTIO_DEV_OFFS_TYPE 0 /* 8 bits */
#define VIRTIO_DEV_OFFS_NUM_VQ 1 /* 8 bits */
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
new file mode 100644
index 0000000000..6549211820
--- /dev/null
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -0,0 +1,134 @@
+/*
+ * virtio ccw machine
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "hw/boards.h"
+#include "exec/address-spaces.h"
+#include "s390-virtio.h"
+#include "sclp.h"
+#include "ioinst.h"
+#include "css.h"
+#include "virtio-ccw.h"
+
+static int virtio_ccw_hcall_notify(const uint64_t *args)
+{
+ uint64_t subch_id = args[0];
+ uint64_t queue = args[1];
+ SubchDev *sch;
+ int cssid, ssid, schid, m;
+
+ if (ioinst_disassemble_sch_ident(subch_id, &m, &cssid, &ssid, &schid)) {
+ return -EINVAL;
+ }
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (!sch || !css_subch_visible(sch)) {
+ return -EINVAL;
+ }
+ virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
+ return 0;
+
+}
+
+static int virtio_ccw_hcall_early_printk(const uint64_t *args)
+{
+ uint64_t mem = args[0];
+
+ if (mem < ram_size) {
+ /* Early printk */
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static void virtio_ccw_register_hcalls(void)
+{
+ s390_register_virtio_hypercall(KVM_S390_VIRTIO_CCW_NOTIFY,
+ virtio_ccw_hcall_notify);
+ /* Tolerate early printk. */
+ s390_register_virtio_hypercall(KVM_S390_VIRTIO_NOTIFY,
+ virtio_ccw_hcall_early_printk);
+}
+
+static void ccw_init(QEMUMachineInitArgs *args)
+{
+ ram_addr_t my_ram_size = args->ram_size;
+ MemoryRegion *sysmem = get_system_memory();
+ MemoryRegion *ram = g_new(MemoryRegion, 1);
+ int shift = 0;
+ uint8_t *storage_keys;
+ int ret;
+ VirtualCssBus *css_bus;
+
+ /* s390x ram size detection needs a 16bit multiplier + an increment. So
+ guests > 64GB can be specified in 2MB steps etc. */
+ while ((my_ram_size >> (20 + shift)) > 65535) {
+ shift++;
+ }
+ my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
+
+ /* lets propagate the changed ram size into the global variable. */
+ ram_size = my_ram_size;
+
+ /* get a BUS */
+ css_bus = virtual_css_bus_init();
+ s390_sclp_init();
+ s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
+ args->initrd_filename);
+
+ /* register hypercalls */
+ virtio_ccw_register_hcalls();
+
+ /* allocate RAM */
+ memory_region_init_ram(ram, "s390.ram", my_ram_size);
+ vmstate_register_ram_global(ram);
+ memory_region_add_subregion(sysmem, 0, ram);
+
+ /* allocate storage keys */
+ storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
+
+ /* init CPUs */
+ s390_init_cpus(args->cpu_model, storage_keys);
+
+ if (kvm_enabled()) {
+ kvm_s390_enable_css_support(s390_cpu_addr2state(0));
+ }
+ /*
+ * Create virtual css and set it as default so that non mcss-e
+ * enabled guests only see virtio devices.
+ */
+ ret = css_create_css_image(VIRTUAL_CSSID, true);
+ assert(ret == 0);
+
+ /* Create VirtIO network adapters */
+ s390_create_virtio_net(BUS(css_bus), "virtio-net-ccw");
+}
+
+static QEMUMachine ccw_machine = {
+ .name = "s390-ccw-virtio",
+ .alias = "s390-ccw",
+ .desc = "VirtIO-ccw based S390 machine",
+ .init = ccw_init,
+ .block_default_type = IF_VIRTIO,
+ .no_cdrom = 1,
+ .no_floppy = 1,
+ .no_serial = 1,
+ .no_parallel = 1,
+ .no_sdcard = 1,
+ .use_sclp = 1,
+ .max_cpus = 255,
+ DEFAULT_MACHINE_OPTIONS,
+};
+
+static void ccw_machine_init(void)
+{
+ qemu_register_machine(&ccw_machine);
+}
+
+machine_init(ccw_machine_init)
diff --git a/hw/s390x/s390-virtio-hcall.c b/hw/s390x/s390-virtio-hcall.c
index d7938c0734..ee626493c6 100644
--- a/hw/s390x/s390-virtio-hcall.c
+++ b/hw/s390x/s390-virtio-hcall.c
@@ -10,7 +10,7 @@
*/
#include "cpu.h"
-#include "hw/s390-virtio.h"
+#include "hw/s390x/s390-virtio.h"
#define MAX_DIAG_SUBCODES 255
diff --git a/hw/s390-virtio.c b/hw/s390x/s390-virtio.c
index 5edaabb7c4..2a1d9ac2da 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -21,22 +21,22 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "hw.h"
+#include "hw/hw.h"
#include "block/block.h"
#include "sysemu/blockdev.h"
#include "sysemu/sysemu.h"
#include "net/net.h"
-#include "boards.h"
+#include "hw/boards.h"
#include "monitor/monitor.h"
-#include "loader.h"
+#include "hw/loader.h"
#include "hw/virtio.h"
#include "hw/sysbus.h"
#include "sysemu/kvm.h"
#include "exec/address-spaces.h"
-#include "hw/s390-virtio-bus.h"
+#include "hw/s390x/s390-virtio-bus.h"
#include "hw/s390x/sclp.h"
-#include "hw/s390-virtio.h"
+#include "hw/s390x/s390-virtio.h"
//#define DEBUG_S390
@@ -86,6 +86,9 @@ static int s390_virtio_hcall_reset(const uint64_t *args)
VirtIOS390Device *dev;
dev = s390_virtio_bus_find_mem(s390_bus, mem);
+ if (dev == NULL) {
+ return -EINVAL;
+ }
virtio_reset(dev->vdev);
stb_phys(dev->dev_offs + VIRTIO_DEV_OFFS_STATUS, 0);
s390_virtio_device_sync(dev);
@@ -147,13 +150,73 @@ unsigned s390_del_running_cpu(CPUS390XState *env)
return s390_running_cpus;
}
+void s390_init_ipl_dev(const char *kernel_filename,
+ const char *kernel_cmdline,
+ const char *initrd_filename)
+{
+ DeviceState *dev;
+
+ dev = qdev_create(NULL, "s390-ipl");
+ if (kernel_filename) {
+ qdev_prop_set_string(dev, "kernel", kernel_filename);
+ }
+ if (initrd_filename) {
+ qdev_prop_set_string(dev, "initrd", initrd_filename);
+ }
+ qdev_prop_set_string(dev, "cmdline", kernel_cmdline);
+ qdev_init_nofail(dev);
+}
+
+void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys)
+{
+ int i;
+
+ if (cpu_model == NULL) {
+ cpu_model = "host";
+ }
+
+ ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
+
+ for (i = 0; i < smp_cpus; i++) {
+ S390CPU *cpu;
+
+ cpu = cpu_s390x_init(cpu_model);
+
+ ipi_states[i] = cpu;
+ cpu->env.halted = 1;
+ cpu->env.exception_index = EXCP_HLT;
+ cpu->env.storage_keys = storage_keys;
+ }
+}
+
+
+void s390_create_virtio_net(BusState *bus, const char *name)
+{
+ int i;
+
+ for (i = 0; i < nb_nics; i++) {
+ NICInfo *nd = &nd_table[i];
+ DeviceState *dev;
+
+ if (!nd->model) {
+ nd->model = g_strdup("virtio");
+ }
+
+ if (strcmp(nd->model, "virtio")) {
+ fprintf(stderr, "S390 only supports VirtIO nics\n");
+ exit(1);
+ }
+
+ dev = qdev_create(bus, name);
+ qdev_set_nic_properties(dev, nd);
+ qdev_init_nofail(dev);
+ }
+}
+
/* PC hardware initialisation */
static void s390_init(QEMUMachineInitArgs *args)
{
ram_addr_t my_ram_size = args->ram_size;
- const char *cpu_model = args->cpu_model;
- CPUS390XState *env = NULL;
- DeviceState *dev;
MemoryRegion *sysmem = get_system_memory();
MemoryRegion *ram = g_new(MemoryRegion, 1);
int shift = 0;
@@ -161,7 +224,6 @@ static void s390_init(QEMUMachineInitArgs *args)
void *virtio_region;
hwaddr virtio_region_len;
hwaddr virtio_region_start;
- int i;
/* s390x ram size detection needs a 16bit multiplier + an increment. So
guests > 64GB can be specified in 2MB steps etc. */
@@ -176,15 +238,8 @@ static void s390_init(QEMUMachineInitArgs *args)
/* get a BUS */
s390_bus = s390_virtio_bus_init(&my_ram_size);
s390_sclp_init();
- dev = qdev_create(NULL, "s390-ipl");
- if (args->kernel_filename) {
- qdev_prop_set_string(dev, "kernel", args->kernel_filename);
- }
- if (args->initrd_filename) {
- qdev_prop_set_string(dev, "initrd", args->initrd_filename);
- }
- qdev_prop_set_string(dev, "cmdline", args->kernel_cmdline);
- qdev_init_nofail(dev);
+ s390_init_ipl_dev(args->kernel_filename, args->kernel_cmdline,
+ args->initrd_filename);
/* register hypercalls */
s390_virtio_register_hcalls();
@@ -207,46 +262,10 @@ static void s390_init(QEMUMachineInitArgs *args)
storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
/* init CPUs */
- if (cpu_model == NULL) {
- cpu_model = "host";
- }
-
- ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
-
- for (i = 0; i < smp_cpus; i++) {
- S390CPU *cpu;
- CPUS390XState *tmp_env;
-
- cpu = cpu_s390x_init(cpu_model);
- tmp_env = &cpu->env;
- if (!env) {
- env = tmp_env;
- }
- ipi_states[i] = cpu;
- tmp_env->halted = 1;
- tmp_env->exception_index = EXCP_HLT;
- tmp_env->storage_keys = storage_keys;
- }
-
+ s390_init_cpus(args->cpu_model, storage_keys);
/* Create VirtIO network adapters */
- for(i = 0; i < nb_nics; i++) {
- NICInfo *nd = &nd_table[i];
- DeviceState *dev;
-
- if (!nd->model) {
- nd->model = g_strdup("virtio");
- }
-
- if (strcmp(nd->model, "virtio")) {
- fprintf(stderr, "S390 only supports VirtIO nics\n");
- exit(1);
- }
-
- dev = qdev_create((BusState *)s390_bus, "virtio-net-s390");
- qdev_set_nic_properties(dev, nd);
- qdev_init_nofail(dev);
- }
+ s390_create_virtio_net((BusState *)s390_bus, "virtio-net-s390");
}
static QEMUMachine s390_machine = {
diff --git a/hw/s390-virtio.h b/hw/s390x/s390-virtio.h
index 25bb610fd8..a6c4c19895 100644
--- a/hw/s390-virtio.h
+++ b/hw/s390x/s390-virtio.h
@@ -15,8 +15,14 @@
#define KVM_S390_VIRTIO_NOTIFY 0
#define KVM_S390_VIRTIO_RESET 1
#define KVM_S390_VIRTIO_SET_STATUS 2
+#define KVM_S390_VIRTIO_CCW_NOTIFY 3
typedef int (*s390_virtio_fn)(const uint64_t *args);
void s390_register_virtio_hypercall(uint64_t code, s390_virtio_fn fn);
+void s390_init_cpus(const char *cpu_model, uint8_t *storage_keys);
+void s390_init_ipl_dev(const char *kernel_filename,
+ const char *kernel_cmdline,
+ const char *initrd_filename);
+void s390_create_virtio_net(BusState *bus, const char *name);
#endif
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
new file mode 100644
index 0000000000..231f81e48c
--- /dev/null
+++ b/hw/s390x/virtio-ccw.c
@@ -0,0 +1,960 @@
+/*
+ * virtio ccw target implementation
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "hw/hw.h"
+#include "block/block.h"
+#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
+#include "net/net.h"
+#include "monitor/monitor.h"
+#include "hw/virtio.h"
+#include "hw/virtio-serial.h"
+#include "hw/virtio-net.h"
+#include "hw/sysbus.h"
+#include "qemu/bitops.h"
+#include "hw/virtio-bus.h"
+
+#include "ioinst.h"
+#include "css.h"
+#include "virtio-ccw.h"
+#include "trace.h"
+
+static int virtual_css_bus_reset(BusState *qbus)
+{
+ /* This should actually be modelled via the generic css */
+ css_reset();
+
+ /* we dont traverse ourself, return 0 */
+ return 0;
+}
+
+
+static void virtual_css_bus_class_init(ObjectClass *klass, void *data)
+{
+ BusClass *k = BUS_CLASS(klass);
+
+ k->reset = virtual_css_bus_reset;
+}
+
+static const TypeInfo virtual_css_bus_info = {
+ .name = TYPE_VIRTUAL_CSS_BUS,
+ .parent = TYPE_BUS,
+ .instance_size = sizeof(VirtualCssBus),
+ .class_init = virtual_css_bus_class_init,
+};
+
+static const VirtIOBindings virtio_ccw_bindings;
+
+VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch)
+{
+ VirtIODevice *vdev = NULL;
+
+ if (sch->driver_data) {
+ vdev = ((VirtioCcwDevice *)sch->driver_data)->vdev;
+ }
+ return vdev;
+}
+
+VirtualCssBus *virtual_css_bus_init(void)
+{
+ VirtualCssBus *cbus;
+ BusState *bus;
+ DeviceState *dev;
+
+ /* Create bridge device */
+ dev = qdev_create(NULL, "virtual-css-bridge");
+ qdev_init_nofail(dev);
+
+ /* Create bus on bridge device */
+ bus = qbus_create(TYPE_VIRTUAL_CSS_BUS, dev, "virtual-css");
+ cbus = VIRTUAL_CSS_BUS(bus);
+
+ /* Enable hotplugging */
+ bus->allow_hotplug = 1;
+
+ return cbus;
+}
+
+/* Communication blocks used by several channel commands. */
+typedef struct VqInfoBlock {
+ uint64_t queue;
+ uint32_t align;
+ uint16_t index;
+ uint16_t num;
+} QEMU_PACKED VqInfoBlock;
+
+typedef struct VqConfigBlock {
+ uint16_t index;
+ uint16_t num_max;
+} QEMU_PACKED VqConfigBlock;
+
+typedef struct VirtioFeatDesc {
+ uint32_t features;
+ uint8_t index;
+} QEMU_PACKED VirtioFeatDesc;
+
+/* Specify where the virtqueues for the subchannel are in guest memory. */
+static int virtio_ccw_set_vqs(SubchDev *sch, uint64_t addr, uint32_t align,
+ uint16_t index, uint16_t num)
+{
+ VirtioCcwDevice *dev = sch->driver_data;
+
+ if (index > VIRTIO_PCI_QUEUE_MAX) {
+ return -EINVAL;
+ }
+
+ /* Current code in virtio.c relies on 4K alignment. */
+ if (addr && (align != 4096)) {
+ return -EINVAL;
+ }
+
+ if (!dev) {
+ return -EINVAL;
+ }
+
+ virtio_queue_set_addr(dev->vdev, index, addr);
+ if (!addr) {
+ virtio_queue_set_vector(dev->vdev, index, 0);
+ } else {
+ /* Fail if we don't have a big enough queue. */
+ /* TODO: Add interface to handle vring.num changing */
+ if (virtio_queue_get_num(dev->vdev, index) > num) {
+ return -EINVAL;
+ }
+ virtio_queue_set_vector(dev->vdev, index, index);
+ }
+ /* tell notify handler in case of config change */
+ dev->vdev->config_vector = VIRTIO_PCI_QUEUE_MAX;
+ return 0;
+}
+
+static int virtio_ccw_cb(SubchDev *sch, CCW1 ccw)
+{
+ int ret;
+ VqInfoBlock info;
+ uint8_t status;
+ VirtioFeatDesc features;
+ void *config;
+ hwaddr indicators;
+ VqConfigBlock vq_config;
+ VirtioCcwDevice *dev = sch->driver_data;
+ bool check_len;
+ int len;
+ hwaddr hw_len;
+
+ if (!dev) {
+ return -EINVAL;
+ }
+
+ trace_virtio_ccw_interpret_ccw(sch->cssid, sch->ssid, sch->schid,
+ ccw.cmd_code);
+ check_len = !((ccw.flags & CCW_FLAG_SLI) && !(ccw.flags & CCW_FLAG_DC));
+
+ /* Look at the command. */
+ switch (ccw.cmd_code) {
+ case CCW_CMD_SET_VQ:
+ if (check_len) {
+ if (ccw.count != sizeof(info)) {
+ ret = -EINVAL;
+ break;
+ }
+ } else if (ccw.count < sizeof(info)) {
+ /* Can't execute command. */
+ ret = -EINVAL;
+ break;
+ }
+ if (!ccw.cda) {
+ ret = -EFAULT;
+ } else {
+ info.queue = ldq_phys(ccw.cda);
+ info.align = ldl_phys(ccw.cda + sizeof(info.queue));
+ info.index = lduw_phys(ccw.cda + sizeof(info.queue)
+ + sizeof(info.align));
+ info.num = lduw_phys(ccw.cda + sizeof(info.queue)
+ + sizeof(info.align)
+ + sizeof(info.index));
+ ret = virtio_ccw_set_vqs(sch, info.queue, info.align, info.index,
+ info.num);
+ sch->curr_status.scsw.count = 0;
+ }
+ break;
+ case CCW_CMD_VDEV_RESET:
+ virtio_reset(dev->vdev);
+ ret = 0;
+ break;
+ case CCW_CMD_READ_FEAT:
+ if (check_len) {
+ if (ccw.count != sizeof(features)) {
+ ret = -EINVAL;
+ break;
+ }
+ } else if (ccw.count < sizeof(features)) {
+ /* Can't execute command. */
+ ret = -EINVAL;
+ break;
+ }
+ if (!ccw.cda) {
+ ret = -EFAULT;
+ } else {
+ features.index = ldub_phys(ccw.cda + sizeof(features.features));
+ if (features.index < ARRAY_SIZE(dev->host_features)) {
+ features.features = dev->host_features[features.index];
+ } else {
+ /* Return zeroes if the guest supports more feature bits. */
+ features.features = 0;
+ }
+ stl_le_phys(ccw.cda, features.features);
+ sch->curr_status.scsw.count = ccw.count - sizeof(features);
+ ret = 0;
+ }
+ break;
+ case CCW_CMD_WRITE_FEAT:
+ if (check_len) {
+ if (ccw.count != sizeof(features)) {
+ ret = -EINVAL;
+ break;
+ }
+ } else if (ccw.count < sizeof(features)) {
+ /* Can't execute command. */
+ ret = -EINVAL;
+ break;
+ }
+ if (!ccw.cda) {
+ ret = -EFAULT;
+ } else {
+ features.index = ldub_phys(ccw.cda + sizeof(features.features));
+ features.features = ldl_le_phys(ccw.cda);
+ if (features.index < ARRAY_SIZE(dev->host_features)) {
+ if (dev->vdev->set_features) {
+ dev->vdev->set_features(dev->vdev, features.features);
+ }
+ dev->vdev->guest_features = features.features;
+ } else {
+ /*
+ * If the guest supports more feature bits, assert that it
+ * passes us zeroes for those we don't support.
+ */
+ if (features.features) {
+ fprintf(stderr, "Guest bug: features[%i]=%x (expected 0)\n",
+ features.index, features.features);
+ /* XXX: do a unit check here? */
+ }
+ }
+ sch->curr_status.scsw.count = ccw.count - sizeof(features);
+ ret = 0;
+ }
+ break;
+ case CCW_CMD_READ_CONF:
+ if (check_len) {
+ if (ccw.count > dev->vdev->config_len) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ len = MIN(ccw.count, dev->vdev->config_len);
+ if (!ccw.cda) {
+ ret = -EFAULT;
+ } else {
+ dev->vdev->get_config(dev->vdev, dev->vdev->config);
+ /* XXX config space endianness */
+ cpu_physical_memory_write(ccw.cda, dev->vdev->config, len);
+ sch->curr_status.scsw.count = ccw.count - len;
+ ret = 0;
+ }
+ break;
+ case CCW_CMD_WRITE_CONF:
+ if (check_len) {
+ if (ccw.count > dev->vdev->config_len) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ len = MIN(ccw.count, dev->vdev->config_len);
+ hw_len = len;
+ if (!ccw.cda) {
+ ret = -EFAULT;
+ } else {
+ config = cpu_physical_memory_map(ccw.cda, &hw_len, 0);
+ if (!config) {
+ ret = -EFAULT;
+ } else {
+ len = hw_len;
+ /* XXX config space endianness */
+ memcpy(dev->vdev->config, config, len);
+ cpu_physical_memory_unmap(config, hw_len, 0, hw_len);
+ if (dev->vdev->set_config) {
+ dev->vdev->set_config(dev->vdev, dev->vdev->config);
+ }
+ sch->curr_status.scsw.count = ccw.count - len;
+ ret = 0;
+ }
+ }
+ break;
+ case CCW_CMD_WRITE_STATUS:
+ if (check_len) {
+ if (ccw.count != sizeof(status)) {
+ ret = -EINVAL;
+ break;
+ }
+ } else if (ccw.count < sizeof(status)) {
+ /* Can't execute command. */
+ ret = -EINVAL;
+ break;
+ }
+ if (!ccw.cda) {
+ ret = -EFAULT;
+ } else {
+ status = ldub_phys(ccw.cda);
+ virtio_set_status(dev->vdev, status);
+ if (dev->vdev->status == 0) {
+ virtio_reset(dev->vdev);
+ }
+ sch->curr_status.scsw.count = ccw.count - sizeof(status);
+ ret = 0;
+ }
+ break;
+ case CCW_CMD_SET_IND:
+ if (check_len) {
+ if (ccw.count != sizeof(indicators)) {
+ ret = -EINVAL;
+ break;
+ }
+ } else if (ccw.count < sizeof(indicators)) {
+ /* Can't execute command. */
+ ret = -EINVAL;
+ break;
+ }
+ indicators = ldq_phys(ccw.cda);
+ if (!indicators) {
+ ret = -EFAULT;
+ } else {
+ dev->indicators = indicators;
+ sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
+ ret = 0;
+ }
+ break;
+ case CCW_CMD_SET_CONF_IND:
+ if (check_len) {
+ if (ccw.count != sizeof(indicators)) {
+ ret = -EINVAL;
+ break;
+ }
+ } else if (ccw.count < sizeof(indicators)) {
+ /* Can't execute command. */
+ ret = -EINVAL;
+ break;
+ }
+ indicators = ldq_phys(ccw.cda);
+ if (!indicators) {
+ ret = -EFAULT;
+ } else {
+ dev->indicators2 = indicators;
+ sch->curr_status.scsw.count = ccw.count - sizeof(indicators);
+ ret = 0;
+ }
+ break;
+ case CCW_CMD_READ_VQ_CONF:
+ if (check_len) {
+ if (ccw.count != sizeof(vq_config)) {
+ ret = -EINVAL;
+ break;
+ }
+ } else if (ccw.count < sizeof(vq_config)) {
+ /* Can't execute command. */
+ ret = -EINVAL;
+ break;
+ }
+ if (!ccw.cda) {
+ ret = -EFAULT;
+ } else {
+ vq_config.index = lduw_phys(ccw.cda);
+ vq_config.num_max = virtio_queue_get_num(dev->vdev,
+ vq_config.index);
+ stw_phys(ccw.cda + sizeof(vq_config.index), vq_config.num_max);
+ sch->curr_status.scsw.count = ccw.count - sizeof(vq_config);
+ ret = 0;
+ }
+ break;
+ default:
+ ret = -ENOSYS;
+ break;
+ }
+ return ret;
+}
+
+static int virtio_ccw_device_init(VirtioCcwDevice *dev, VirtIODevice *vdev)
+{
+ unsigned int cssid = 0;
+ unsigned int ssid = 0;
+ unsigned int schid;
+ unsigned int devno;
+ bool have_devno = false;
+ bool found = false;
+ SubchDev *sch;
+ int ret;
+ int num;
+ DeviceState *parent = DEVICE(dev);
+
+ sch = g_malloc0(sizeof(SubchDev));
+
+ sch->driver_data = dev;
+ dev->sch = sch;
+
+ dev->vdev = vdev;
+ dev->indicators = 0;
+
+ /* Initialize subchannel structure. */
+ sch->channel_prog = 0x0;
+ sch->last_cmd_valid = false;
+ sch->orb = NULL;
+ /*
+ * Use a device number if provided. Otherwise, fall back to subchannel
+ * number.
+ */
+ if (dev->bus_id) {
+ num = sscanf(dev->bus_id, "%x.%x.%04x", &cssid, &ssid, &devno);
+ if (num == 3) {
+ if ((cssid > MAX_CSSID) || (ssid > MAX_SSID)) {
+ ret = -EINVAL;
+ error_report("Invalid cssid or ssid: cssid %x, ssid %x",
+ cssid, ssid);
+ goto out_err;
+ }
+ /* Enforce use of virtual cssid. */
+ if (cssid != VIRTUAL_CSSID) {
+ ret = -EINVAL;
+ error_report("cssid %x not valid for virtio devices", cssid);
+ goto out_err;
+ }
+ if (css_devno_used(cssid, ssid, devno)) {
+ ret = -EEXIST;
+ error_report("Device %x.%x.%04x already exists", cssid, ssid,
+ devno);
+ goto out_err;
+ }
+ sch->cssid = cssid;
+ sch->ssid = ssid;
+ sch->devno = devno;
+ have_devno = true;
+ } else {
+ ret = -EINVAL;
+ error_report("Malformed devno parameter '%s'", dev->bus_id);
+ goto out_err;
+ }
+ }
+
+ /* Find the next free id. */
+ if (have_devno) {
+ for (schid = 0; schid <= MAX_SCHID; schid++) {
+ if (!css_find_subch(1, cssid, ssid, schid)) {
+ sch->schid = schid;
+ css_subch_assign(cssid, ssid, schid, devno, sch);
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ ret = -ENODEV;
+ error_report("No free subchannel found for %x.%x.%04x", cssid, ssid,
+ devno);
+ goto out_err;
+ }
+ trace_virtio_ccw_new_device(cssid, ssid, schid, devno,
+ "user-configured");
+ } else {
+ cssid = VIRTUAL_CSSID;
+ for (ssid = 0; ssid <= MAX_SSID; ssid++) {
+ for (schid = 0; schid <= MAX_SCHID; schid++) {
+ if (!css_find_subch(1, cssid, ssid, schid)) {
+ sch->cssid = cssid;
+ sch->ssid = ssid;
+ sch->schid = schid;
+ devno = schid;
+ /*
+ * If the devno is already taken, look further in this
+ * subchannel set.
+ */
+ while (css_devno_used(cssid, ssid, devno)) {
+ if (devno == MAX_SCHID) {
+ devno = 0;
+ } else if (devno == schid - 1) {
+ ret = -ENODEV;
+ error_report("No free devno found");
+ goto out_err;
+ } else {
+ devno++;
+ }
+ }
+ sch->devno = devno;
+ css_subch_assign(cssid, ssid, schid, devno, sch);
+ found = true;
+ break;
+ }
+ }
+ if (found) {
+ break;
+ }
+ }
+ if (!found) {
+ ret = -ENODEV;
+ error_report("Virtual channel subsystem is full!");
+ goto out_err;
+ }
+ trace_virtio_ccw_new_device(cssid, ssid, schid, devno,
+ "auto-configured");
+ }
+
+ /* Build initial schib. */
+ css_sch_build_virtual_schib(sch, 0, VIRTIO_CCW_CHPID_TYPE);
+
+ sch->ccw_cb = virtio_ccw_cb;
+
+ /* Build senseid data. */
+ memset(&sch->id, 0, sizeof(SenseId));
+ sch->id.reserved = 0xff;
+ sch->id.cu_type = VIRTIO_CCW_CU_TYPE;
+ sch->id.cu_model = dev->vdev->device_id;
+
+ virtio_bind_device(vdev, &virtio_ccw_bindings, DEVICE(dev));
+ /* Only the first 32 feature bits are used. */
+ dev->host_features[0] = vdev->get_features(vdev, dev->host_features[0]);
+ dev->host_features[0] |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY;
+ dev->host_features[0] |= 0x1 << VIRTIO_F_BAD_FEATURE;
+
+ css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid,
+ parent->hotplugged, 1);
+ return 0;
+
+out_err:
+ dev->sch = NULL;
+ g_free(sch);
+ return ret;
+}
+
+static int virtio_ccw_exit(VirtioCcwDevice *dev)
+{
+ SubchDev *sch = dev->sch;
+
+ if (sch) {
+ css_subch_assign(sch->cssid, sch->ssid, sch->schid, sch->devno, NULL);
+ g_free(sch);
+ }
+ dev->indicators = 0;
+ return 0;
+}
+
+static int virtio_ccw_net_init(VirtioCcwDevice *dev)
+{
+ VirtIODevice *vdev;
+
+ vdev = virtio_net_init((DeviceState *)dev, &dev->nic, &dev->net);
+ if (!vdev) {
+ return -1;
+ }
+
+ return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_net_exit(VirtioCcwDevice *dev)
+{
+ virtio_net_exit(dev->vdev);
+ return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_blk_init(VirtioCcwDevice *dev)
+{
+ VirtIODevice *vdev;
+
+ vdev = virtio_blk_init((DeviceState *)dev, &dev->blk);
+ if (!vdev) {
+ return -1;
+ }
+
+ return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_blk_exit(VirtioCcwDevice *dev)
+{
+ virtio_blk_exit(dev->vdev);
+ blockdev_mark_auto_del(dev->blk.conf.bs);
+ return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_serial_init(VirtioCcwDevice *dev)
+{
+ VirtIODevice *vdev;
+
+ vdev = virtio_serial_init((DeviceState *)dev, &dev->serial);
+ if (!vdev) {
+ return -1;
+ }
+
+ return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_serial_exit(VirtioCcwDevice *dev)
+{
+ virtio_serial_exit(dev->vdev);
+ return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_balloon_init(VirtioCcwDevice *dev)
+{
+ VirtIODevice *vdev;
+
+ vdev = virtio_balloon_init((DeviceState *)dev);
+ if (!vdev) {
+ return -1;
+ }
+
+ return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_balloon_exit(VirtioCcwDevice *dev)
+{
+ virtio_balloon_exit(dev->vdev);
+ return virtio_ccw_exit(dev);
+}
+
+static int virtio_ccw_scsi_init(VirtioCcwDevice *dev)
+{
+ VirtIODevice *vdev;
+
+ vdev = virtio_scsi_init((DeviceState *)dev, &dev->scsi);
+ if (!vdev) {
+ return -1;
+ }
+
+ return virtio_ccw_device_init(dev, vdev);
+}
+
+static int virtio_ccw_scsi_exit(VirtioCcwDevice *dev)
+{
+ virtio_scsi_exit(dev->vdev);
+ return virtio_ccw_exit(dev);
+}
+
+/* DeviceState to VirtioCcwDevice. Note: used on datapath,
+ * be careful and test performance if you change this.
+ */
+static inline VirtioCcwDevice *to_virtio_ccw_dev_fast(DeviceState *d)
+{
+ return container_of(d, VirtioCcwDevice, parent_obj);
+}
+
+static void virtio_ccw_notify(DeviceState *d, uint16_t vector)
+{
+ VirtioCcwDevice *dev = to_virtio_ccw_dev_fast(d);
+ SubchDev *sch = dev->sch;
+ uint64_t indicators;
+
+ if (vector >= 128) {
+ return;
+ }
+
+ if (vector < VIRTIO_PCI_QUEUE_MAX) {
+ indicators = ldq_phys(dev->indicators);
+ indicators |= 1ULL << vector;
+ stq_phys(dev->indicators, indicators);
+ } else {
+ vector = 0;
+ indicators = ldq_phys(dev->indicators2);
+ indicators |= 1ULL << vector;
+ stq_phys(dev->indicators2, indicators);
+ }
+
+ css_conditional_io_interrupt(sch);
+
+}
+
+static unsigned virtio_ccw_get_features(DeviceState *d)
+{
+ VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
+
+ /* Only the first 32 feature bits are used. */
+ return dev->host_features[0];
+}
+
+static void virtio_ccw_reset(DeviceState *d)
+{
+ VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
+
+ virtio_reset(dev->vdev);
+ css_reset_sch(dev->sch);
+}
+
+/**************** Virtio-ccw Bus Device Descriptions *******************/
+
+static const VirtIOBindings virtio_ccw_bindings = {
+ .notify = virtio_ccw_notify,
+ .get_features = virtio_ccw_get_features,
+};
+
+static Property virtio_ccw_net_properties[] = {
+ DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+ DEFINE_VIRTIO_NET_FEATURES(VirtioCcwDevice, host_features[0]),
+ DEFINE_NIC_PROPERTIES(VirtioCcwDevice, nic),
+ DEFINE_PROP_UINT32("x-txtimer", VirtioCcwDevice,
+ net.txtimer, TX_TIMER_INTERVAL),
+ DEFINE_PROP_INT32("x-txburst", VirtioCcwDevice,
+ net.txburst, TX_BURST),
+ DEFINE_PROP_STRING("tx", VirtioCcwDevice, net.tx),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_net_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+ k->init = virtio_ccw_net_init;
+ k->exit = virtio_ccw_net_exit;
+ dc->reset = virtio_ccw_reset;
+ dc->props = virtio_ccw_net_properties;
+}
+
+static const TypeInfo virtio_ccw_net = {
+ .name = "virtio-net-ccw",
+ .parent = TYPE_VIRTIO_CCW_DEVICE,
+ .instance_size = sizeof(VirtioCcwDevice),
+ .class_init = virtio_ccw_net_class_init,
+};
+
+static Property virtio_ccw_blk_properties[] = {
+ DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+ DEFINE_BLOCK_PROPERTIES(VirtioCcwDevice, blk.conf),
+ DEFINE_PROP_STRING("serial", VirtioCcwDevice, blk.serial),
+#ifdef __linux__
+ DEFINE_PROP_BIT("scsi", VirtioCcwDevice, blk.scsi, 0, true),
+#endif
+ DEFINE_VIRTIO_BLK_FEATURES(VirtioCcwDevice, host_features[0]),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_blk_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+ k->init = virtio_ccw_blk_init;
+ k->exit = virtio_ccw_blk_exit;
+ dc->reset = virtio_ccw_reset;
+ dc->props = virtio_ccw_blk_properties;
+}
+
+static const TypeInfo virtio_ccw_blk = {
+ .name = "virtio-blk-ccw",
+ .parent = TYPE_VIRTIO_CCW_DEVICE,
+ .instance_size = sizeof(VirtioCcwDevice),
+ .class_init = virtio_ccw_blk_class_init,
+};
+
+static Property virtio_ccw_serial_properties[] = {
+ DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+ DEFINE_PROP_UINT32("max_ports", VirtioCcwDevice,
+ serial.max_virtserial_ports, 31),
+ DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_serial_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+ k->init = virtio_ccw_serial_init;
+ k->exit = virtio_ccw_serial_exit;
+ dc->reset = virtio_ccw_reset;
+ dc->props = virtio_ccw_serial_properties;
+}
+
+static const TypeInfo virtio_ccw_serial = {
+ .name = "virtio-serial-ccw",
+ .parent = TYPE_VIRTIO_CCW_DEVICE,
+ .instance_size = sizeof(VirtioCcwDevice),
+ .class_init = virtio_ccw_serial_class_init,
+};
+
+static Property virtio_ccw_balloon_properties[] = {
+ DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+ DEFINE_VIRTIO_COMMON_FEATURES(VirtioCcwDevice, host_features[0]),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_balloon_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+ k->init = virtio_ccw_balloon_init;
+ k->exit = virtio_ccw_balloon_exit;
+ dc->reset = virtio_ccw_reset;
+ dc->props = virtio_ccw_balloon_properties;
+}
+
+static const TypeInfo virtio_ccw_balloon = {
+ .name = "virtio-balloon-ccw",
+ .parent = TYPE_VIRTIO_CCW_DEVICE,
+ .instance_size = sizeof(VirtioCcwDevice),
+ .class_init = virtio_ccw_balloon_class_init,
+};
+
+static Property virtio_ccw_scsi_properties[] = {
+ DEFINE_PROP_STRING("devno", VirtioCcwDevice, bus_id),
+ DEFINE_VIRTIO_SCSI_PROPERTIES(VirtioCcwDevice, host_features[0], scsi),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_ccw_scsi_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ VirtIOCCWDeviceClass *k = VIRTIO_CCW_DEVICE_CLASS(klass);
+
+ k->init = virtio_ccw_scsi_init;
+ k->exit = virtio_ccw_scsi_exit;
+ dc->reset = virtio_ccw_reset;
+ dc->props = virtio_ccw_scsi_properties;
+}
+
+static const TypeInfo virtio_ccw_scsi = {
+ .name = "virtio-scsi-ccw",
+ .parent = TYPE_VIRTIO_CCW_DEVICE,
+ .instance_size = sizeof(VirtioCcwDevice),
+ .class_init = virtio_ccw_scsi_class_init,
+};
+
+static int virtio_ccw_busdev_init(DeviceState *dev)
+{
+ VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+ VirtIOCCWDeviceClass *_info = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
+
+ virtio_ccw_bus_new(&_dev->bus, _dev);
+
+ return _info->init(_dev);
+}
+
+static int virtio_ccw_busdev_exit(DeviceState *dev)
+{
+ VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+ VirtIOCCWDeviceClass *_info = VIRTIO_CCW_DEVICE_GET_CLASS(dev);
+
+ return _info->exit(_dev);
+}
+
+static int virtio_ccw_busdev_unplug(DeviceState *dev)
+{
+ VirtioCcwDevice *_dev = (VirtioCcwDevice *)dev;
+ SubchDev *sch = _dev->sch;
+
+ /*
+ * We should arrive here only for device_del, since we don't support
+ * direct hot(un)plug of channels, but only through virtio.
+ */
+ assert(sch != NULL);
+ /* Subchannel is now disabled and no longer valid. */
+ sch->curr_status.pmcw.flags &= ~(PMCW_FLAGS_MASK_ENA |
+ PMCW_FLAGS_MASK_DNV);
+
+ css_generate_sch_crws(sch->cssid, sch->ssid, sch->schid, 1, 0);
+
+ object_unparent(OBJECT(dev));
+ qdev_free(dev);
+ return 0;
+}
+
+static void virtio_ccw_device_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->init = virtio_ccw_busdev_init;
+ dc->exit = virtio_ccw_busdev_exit;
+ dc->unplug = virtio_ccw_busdev_unplug;
+ dc->bus_type = TYPE_VIRTUAL_CSS_BUS;
+
+}
+
+static const TypeInfo virtio_ccw_device_info = {
+ .name = TYPE_VIRTIO_CCW_DEVICE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(VirtioCcwDevice),
+ .class_init = virtio_ccw_device_class_init,
+ .class_size = sizeof(VirtIOCCWDeviceClass),
+ .abstract = true,
+};
+
+/***************** Virtual-css Bus Bridge Device ********************/
+/* Only required to have the virtio bus as child in the system bus */
+
+static int virtual_css_bridge_init(SysBusDevice *dev)
+{
+ /* nothing */
+ return 0;
+}
+
+static void virtual_css_bridge_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+ k->init = virtual_css_bridge_init;
+ dc->no_user = 1;
+}
+
+static const TypeInfo virtual_css_bridge_info = {
+ .name = "virtual-css-bridge",
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(SysBusDevice),
+ .class_init = virtual_css_bridge_class_init,
+};
+
+/* virtio-ccw-bus */
+
+void virtio_ccw_bus_new(VirtioBusState *bus, VirtioCcwDevice *dev)
+{
+ DeviceState *qdev = DEVICE(dev);
+ BusState *qbus;
+
+ qbus_create_inplace((BusState *)bus, TYPE_VIRTIO_CCW_BUS, qdev, NULL);
+ qbus = BUS(bus);
+ qbus->allow_hotplug = 0;
+}
+
+static void virtio_ccw_bus_class_init(ObjectClass *klass, void *data)
+{
+ VirtioBusClass *k = VIRTIO_BUS_CLASS(klass);
+ BusClass *bus_class = BUS_CLASS(klass);
+
+ bus_class->max_dev = 1;
+ k->notify = virtio_ccw_notify;
+ k->get_features = virtio_ccw_get_features;
+}
+
+static const TypeInfo virtio_ccw_bus_info = {
+ .name = TYPE_VIRTIO_CCW_BUS,
+ .parent = TYPE_VIRTIO_BUS,
+ .instance_size = sizeof(VirtioCcwBusState),
+ .class_init = virtio_ccw_bus_class_init,
+};
+
+static void virtio_ccw_register(void)
+{
+ type_register_static(&virtio_ccw_bus_info);
+ type_register_static(&virtual_css_bus_info);
+ type_register_static(&virtio_ccw_device_info);
+ type_register_static(&virtio_ccw_serial);
+ type_register_static(&virtio_ccw_blk);
+ type_register_static(&virtio_ccw_net);
+ type_register_static(&virtio_ccw_balloon);
+ type_register_static(&virtio_ccw_scsi);
+ type_register_static(&virtual_css_bridge_info);
+}
+
+type_init(virtio_ccw_register)
diff --git a/hw/s390x/virtio-ccw.h b/hw/s390x/virtio-ccw.h
new file mode 100644
index 0000000000..48474b31ab
--- /dev/null
+++ b/hw/s390x/virtio-ccw.h
@@ -0,0 +1,98 @@
+/*
+ * virtio ccw target definitions
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390X_VIRTIO_CCW_H
+#define HW_S390X_VIRTIO_CCW_H
+
+#include <hw/virtio-blk.h>
+#include <hw/virtio-net.h>
+#include <hw/virtio-serial.h>
+#include <hw/virtio-scsi.h>
+#include <hw/virtio-bus.h>
+
+#define VIRTUAL_CSSID 0xfe
+
+#define VIRTIO_CCW_CU_TYPE 0x3832
+#define VIRTIO_CCW_CHPID_TYPE 0x32
+
+#define CCW_CMD_SET_VQ 0x13
+#define CCW_CMD_VDEV_RESET 0x33
+#define CCW_CMD_READ_FEAT 0x12
+#define CCW_CMD_WRITE_FEAT 0x11
+#define CCW_CMD_READ_CONF 0x22
+#define CCW_CMD_WRITE_CONF 0x21
+#define CCW_CMD_WRITE_STATUS 0x31
+#define CCW_CMD_SET_IND 0x43
+#define CCW_CMD_SET_CONF_IND 0x53
+#define CCW_CMD_READ_VQ_CONF 0x32
+
+#define TYPE_VIRTIO_CCW_DEVICE "virtio-ccw-device"
+#define VIRTIO_CCW_DEVICE(obj) \
+ OBJECT_CHECK(VirtioCcwDevice, (obj), TYPE_VIRTIO_CCW_DEVICE)
+#define VIRTIO_CCW_DEVICE_CLASS(klass) \
+ OBJECT_CLASS_CHECK(VirtIOCCWDeviceClass, (klass), TYPE_VIRTIO_CCW_DEVICE)
+#define VIRTIO_CCW_DEVICE_GET_CLASS(obj) \
+ OBJECT_GET_CLASS(VirtIOCCWDeviceClass, (obj), TYPE_VIRTIO_CCW_DEVICE)
+
+typedef struct VirtioBusState VirtioCcwBusState;
+typedef struct VirtioBusClass VirtioCcwBusClass;
+
+#define TYPE_VIRTIO_CCW_BUS "virtio-ccw-bus"
+#define VIRTIO_CCW_BUS(obj) \
+ OBJECT_CHECK(VirtioCcwBus, (obj), TYPE_VIRTIO_CCW_BUS)
+#define VIRTIO_CCW_BUS_GET_CLASS(obj) \
+ OBJECT_CHECK(VirtioCcwBusState, (obj), TYPE_VIRTIO_CCW_BUS)
+#define VIRTIO_CCW_BUS_CLASS(klass) \
+ OBJECT_CLASS_CHECK(VirtioCcwBusClass, klass, TYPE_VIRTIO_CCW_BUS)
+
+typedef struct VirtioCcwDevice VirtioCcwDevice;
+
+void virtio_ccw_bus_new(VirtioBusState *bus, VirtioCcwDevice *dev);
+
+typedef struct VirtIOCCWDeviceClass {
+ DeviceClass parent_class;
+ int (*init)(VirtioCcwDevice *dev);
+ int (*exit)(VirtioCcwDevice *dev);
+} VirtIOCCWDeviceClass;
+
+/* Change here if we want to support more feature bits. */
+#define VIRTIO_CCW_FEATURE_SIZE 1
+
+struct VirtioCcwDevice {
+ DeviceState parent_obj;
+ SubchDev *sch;
+ VirtIODevice *vdev;
+ char *bus_id;
+ VirtIOBlkConf blk;
+ NICConf nic;
+ uint32_t host_features[VIRTIO_CCW_FEATURE_SIZE];
+ virtio_serial_conf serial;
+ virtio_net_conf net;
+ VirtIOSCSIConf scsi;
+ VirtioBusState bus;
+ /* Guest provided values: */
+ hwaddr indicators;
+ hwaddr indicators2;
+};
+
+/* virtual css bus type */
+typedef struct VirtualCssBus {
+ BusState parent_obj;
+} VirtualCssBus;
+
+#define TYPE_VIRTUAL_CSS_BUS "virtual-css-bus"
+#define VIRTUAL_CSS_BUS(obj) \
+ OBJECT_CHECK(VirtualCssBus, (obj), TYPE_VIRTUAL_CSS_BUS)
+
+VirtualCssBus *virtual_css_bus_init(void);
+void virtio_ccw_device_update_status(SubchDev *sch);
+VirtIODevice *virtio_ccw_get_vdev(SubchDev *sch);
+#endif
diff --git a/hw/sun4m.c b/hw/sun4m.c
index 035a011768..9903f443cb 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -1021,6 +1021,7 @@ static void sun4m_hw_init(const struct sun4m_hwdef *hwdef, ram_addr_t RAM_size,
hwdef->ecc_version);
fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
@@ -1665,6 +1666,7 @@ static void sun4d_hw_init(const struct sun4d_hwdef *hwdef, ram_addr_t RAM_size,
"Sun4d");
fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
@@ -1865,6 +1867,7 @@ static void sun4c_hw_init(const struct sun4c_hwdef *hwdef, ram_addr_t RAM_size,
"Sun4c");
fw_cfg = fw_cfg_init(0, 0, CFG_ADDR, CFG_ADDR + 2);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
diff --git a/hw/sun4u.c b/hw/sun4u.c
index b891b84c9c..9fbda29ac4 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -878,6 +878,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem,
(uint8_t *)&nd_table[0].macaddr);
fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
fw_cfg_add_i32(fw_cfg, FW_CFG_ID, 1);
fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, hwdef->machine_id);
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 3040bc63ab..c0a790264c 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -14,6 +14,7 @@
*/
#include "qemu/iov.h"
+#include "qemu/timer.h"
#include "qemu-common.h"
#include "virtio.h"
#include "pc.h"
@@ -22,6 +23,7 @@
#include "virtio-balloon.h"
#include "sysemu/kvm.h"
#include "exec/address-spaces.h"
+#include "qapi/visitor.h"
#if defined(__linux__)
#include <sys/mman.h>
@@ -36,6 +38,9 @@ typedef struct VirtIOBalloon
uint64_t stats[VIRTIO_BALLOON_S_NR];
VirtQueueElement stats_vq_elem;
size_t stats_vq_offset;
+ QEMUTimer *stats_timer;
+ int64_t stats_last_update;
+ int64_t stats_poll_interval;
DeviceState *qdev;
} VirtIOBalloon;
@@ -53,6 +58,16 @@ static void balloon_page(void *addr, int deflate)
#endif
}
+static const char *balloon_stat_names[] = {
+ [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in",
+ [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out",
+ [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults",
+ [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults",
+ [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory",
+ [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory",
+ [VIRTIO_BALLOON_S_NR] = NULL
+};
+
/*
* reset_stats - Mark all items in the stats array as unset
*
@@ -67,6 +82,118 @@ static inline void reset_stats(VirtIOBalloon *dev)
for (i = 0; i < VIRTIO_BALLOON_S_NR; dev->stats[i++] = -1);
}
+static bool balloon_stats_supported(const VirtIOBalloon *s)
+{
+ return s->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ);
+}
+
+static bool balloon_stats_enabled(const VirtIOBalloon *s)
+{
+ return s->stats_poll_interval > 0;
+}
+
+static void balloon_stats_destroy_timer(VirtIOBalloon *s)
+{
+ if (balloon_stats_enabled(s)) {
+ qemu_del_timer(s->stats_timer);
+ qemu_free_timer(s->stats_timer);
+ s->stats_timer = NULL;
+ s->stats_poll_interval = 0;
+ }
+}
+
+static void balloon_stats_change_timer(VirtIOBalloon *s, int secs)
+{
+ qemu_mod_timer(s->stats_timer, qemu_get_clock_ms(vm_clock) + secs * 1000);
+}
+
+static void balloon_stats_poll_cb(void *opaque)
+{
+ VirtIOBalloon *s = opaque;
+
+ if (!balloon_stats_supported(s)) {
+ /* re-schedule */
+ balloon_stats_change_timer(s, s->stats_poll_interval);
+ return;
+ }
+
+ virtqueue_push(s->svq, &s->stats_vq_elem, s->stats_vq_offset);
+ virtio_notify(&s->vdev, s->svq);
+}
+
+static void balloon_stats_get_all(Object *obj, struct Visitor *v,
+ void *opaque, const char *name, Error **errp)
+{
+ VirtIOBalloon *s = opaque;
+ int i;
+
+ if (!s->stats_last_update) {
+ error_setg(errp, "guest hasn't updated any stats yet");
+ return;
+ }
+
+ visit_start_struct(v, NULL, "guest-stats", name, 0, errp);
+ visit_type_int(v, &s->stats_last_update, "last-update", errp);
+
+ visit_start_struct(v, NULL, NULL, "stats", 0, errp);
+ for (i = 0; i < VIRTIO_BALLOON_S_NR; i++) {
+ visit_type_int64(v, (int64_t *) &s->stats[i], balloon_stat_names[i],
+ errp);
+ }
+ visit_end_struct(v, errp);
+
+ visit_end_struct(v, errp);
+}
+
+static void balloon_stats_get_poll_interval(Object *obj, struct Visitor *v,
+ void *opaque, const char *name,
+ Error **errp)
+{
+ VirtIOBalloon *s = opaque;
+ visit_type_int(v, &s->stats_poll_interval, name, errp);
+}
+
+static void balloon_stats_set_poll_interval(Object *obj, struct Visitor *v,
+ void *opaque, const char *name,
+ Error **errp)
+{
+ VirtIOBalloon *s = opaque;
+ int64_t value;
+
+ visit_type_int(v, &value, name, errp);
+ if (error_is_set(errp)) {
+ return;
+ }
+
+ if (value < 0) {
+ error_setg(errp, "timer value must be greater than zero");
+ return;
+ }
+
+ if (value == s->stats_poll_interval) {
+ return;
+ }
+
+ if (value == 0) {
+ /* timer=0 disables the timer */
+ balloon_stats_destroy_timer(s);
+ return;
+ }
+
+ if (balloon_stats_enabled(s)) {
+ /* timer interval change */
+ s->stats_poll_interval = value;
+ balloon_stats_change_timer(s, value);
+ return;
+ }
+
+ /* create a new timer */
+ g_assert(s->stats_timer == NULL);
+ s->stats_timer = qemu_new_timer_ms(vm_clock, balloon_stats_poll_cb, s);
+ s->stats_poll_interval = value;
+ balloon_stats_change_timer(s, 0);
+}
+
static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
{
VirtIOBalloon *s = to_virtio_balloon(vdev);
@@ -107,9 +234,10 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
VirtQueueElement *elem = &s->stats_vq_elem;
VirtIOBalloonStat stat;
size_t offset = 0;
+ qemu_timeval tv;
if (!virtqueue_pop(vq, elem)) {
- return;
+ goto out;
}
/* Initialize the stats to get rid of any stale values. This is only
@@ -128,6 +256,18 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
s->stats[tag] = val;
}
s->stats_vq_offset = offset;
+
+ if (qemu_gettimeofday(&tv) < 0) {
+ fprintf(stderr, "warning: %s: failed to get time of day\n", __func__);
+ goto out;
+ }
+
+ s->stats_last_update = tv.tv_sec;
+
+out:
+ if (balloon_stats_enabled(s)) {
+ balloon_stats_change_timer(s, s->stats_poll_interval);
+ }
}
static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
@@ -164,28 +304,6 @@ static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
{
VirtIOBalloon *dev = opaque;
-
-#if 0
- /* Disable guest-provided stats for now. For more details please check:
- * https://bugzilla.redhat.com/show_bug.cgi?id=623903
- *
- * If you do enable it (which is probably not going to happen as we
- * need a new command for it), remember that you also need to fill the
- * appropriate members of the BalloonInfo structure so that the stats
- * are returned to the client.
- */
- if (dev->vdev.guest_features & (1 << VIRTIO_BALLOON_F_STATS_VQ)) {
- virtqueue_push(dev->svq, &dev->stats_vq_elem, dev->stats_vq_offset);
- virtio_notify(&dev->vdev, dev->svq);
- return;
- }
-#endif
-
- /* Stats are not supported. Clear out any stale values that might
- * have been set by a more featureful guest kernel.
- */
- reset_stats(dev);
-
info->actual = ram_size - ((uint64_t) dev->actual <<
VIRTIO_BALLOON_PFN_SHIFT);
}
@@ -255,12 +373,18 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev)
s->dvq = virtio_add_queue(&s->vdev, 128, virtio_balloon_handle_output);
s->svq = virtio_add_queue(&s->vdev, 128, virtio_balloon_receive_stats);
- reset_stats(s);
-
s->qdev = dev;
register_savevm(dev, "virtio-balloon", -1, 1,
virtio_balloon_save, virtio_balloon_load, s);
+ object_property_add(OBJECT(dev), "guest-stats", "guest statistics",
+ balloon_stats_get_all, NULL, NULL, s, NULL);
+
+ object_property_add(OBJECT(dev), "guest-stats-polling-interval", "int",
+ balloon_stats_get_poll_interval,
+ balloon_stats_set_poll_interval,
+ NULL, s, NULL);
+
return &s->vdev;
}
@@ -268,6 +392,7 @@ void virtio_balloon_exit(VirtIODevice *vdev)
{
VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
+ balloon_stats_destroy_timer(s);
qemu_remove_balloon_handler(s);
unregister_savevm(s->qdev, "virtio-balloon", s);
virtio_cleanup(vdev);
diff --git a/hw/xilinx_ethlite.c b/hw/xilinx_ethlite.c
index 2254851f0a..11dfbc3ac1 100644
--- a/hw/xilinx_ethlite.c
+++ b/hw/xilinx_ethlite.c
@@ -89,7 +89,7 @@ eth_read(void *opaque, hwaddr addr, unsigned int size)
case R_RX_CTRL1:
case R_RX_CTRL0:
r = s->regs[addr];
- D(qemu_log("%s %x=%x\n", __func__, addr * 4, r));
+ D(qemu_log("%s " TARGET_FMT_plx "=%x\n", __func__, addr * 4, r));
break;
default:
@@ -115,7 +115,8 @@ eth_write(void *opaque, hwaddr addr,
if (addr == R_TX_CTRL1)
base = 0x800 / 4;
- D(qemu_log("%s addr=%x val=%x\n", __func__, addr * 4, value));
+ D(qemu_log("%s addr=" TARGET_FMT_plx " val=%x\n",
+ __func__, addr * 4, value));
if ((value & (CTRL_P | CTRL_S)) == CTRL_S) {
qemu_send_packet(&s->nic->nc,
(void *) &s->regs[base],
@@ -135,12 +136,16 @@ eth_write(void *opaque, hwaddr addr,
break;
/* Keep these native. */
+ case R_RX_CTRL0:
+ case R_RX_CTRL1:
+ if (!(value & CTRL_S)) {
+ qemu_flush_queued_packets(&s->nic->nc);
+ }
case R_TX_LEN0:
case R_TX_LEN1:
case R_TX_GIE0:
- case R_RX_CTRL0:
- case R_RX_CTRL1:
- D(qemu_log("%s addr=%x val=%x\n", __func__, addr * 4, value));
+ D(qemu_log("%s addr=" TARGET_FMT_plx " val=%x\n",
+ __func__, addr * 4, value));
s->regs[addr] = value;
break;
@@ -163,9 +168,9 @@ static const MemoryRegionOps eth_ops = {
static int eth_can_rx(NetClientState *nc)
{
struct xlx_ethlite *s = DO_UPCAST(NICState, nc, nc)->opaque;
- int r;
- r = !(s->regs[R_RX_CTRL0] & CTRL_S);
- return r;
+ unsigned int rxbase = s->rxbuf * (0x800 / 4);
+
+ return !(s->regs[rxbase + R_RX_CTRL0] & CTRL_S);
}
static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size)
@@ -182,7 +187,7 @@ static ssize_t eth_rx(NetClientState *nc, const uint8_t *buf, size_t size)
return -1;
}
- D(qemu_log("%s %d rxbase=%x\n", __func__, size, rxbase));
+ D(qemu_log("%s %zd rxbase=%x\n", __func__, size, rxbase));
memcpy(&s->regs[rxbase + R_RX_BUF0], buf, size);
s->regs[rxbase + R_RX_CTRL0] |= CTRL_S;
diff --git a/include/block/block.h b/include/block/block.h
index ffd193637d..5c3b911c1b 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -309,6 +309,10 @@ int bdrv_get_flags(BlockDriverState *bs);
int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors);
const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
void bdrv_get_backing_filename(BlockDriverState *bs,
@@ -351,13 +355,12 @@ void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-#define BDRV_SECTORS_PER_DIRTY_CHUNK 2048
-
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
+struct HBitmapIter;
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector);
+void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
int64_t bdrv_get_dirty_count(BlockDriverState *bs);
void bdrv_enable_copy_on_read(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f83ffb8a08..f7279b978a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -32,6 +32,7 @@
#include "qapi-types.h"
#include "qapi/qmp/qerror.h"
#include "monitor/monitor.h"
+#include "qemu/hbitmap.h"
#define BLOCK_FLAG_ENCRYPT 1
#define BLOCK_FLAG_COMPAT6 4
@@ -275,8 +276,7 @@ struct BlockDriverState {
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
char device_name[32];
- unsigned long *dirty_bitmap;
- int64_t dirty_count;
+ HBitmap *dirty_bitmap;
int in_use; /* users other than guest access, eg. block migration */
QTAILQ_ENTRY(BlockDriverState) list;
@@ -344,6 +344,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
* @bs: Block device to operate on.
* @target: Block device to write to.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
* @mode: Whether to collapse all images in the chain to the target.
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
@@ -357,8 +359,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
* @bs will be switched to read from @target.
*/
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, MirrorSyncMode mode,
- BlockdevOnError on_source_error,
+ int64_t speed, int64_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockDriverCompletionFunc *cb,
void *opaque, Error **errp);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index ca464bb367..af2379ff38 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -68,6 +68,9 @@
#if !defined(ECANCELED)
#define ECANCELED 4097
#endif
+#if !defined(EMEDIUMTYPE)
+#define EMEDIUMTYPE 4098
+#endif
#ifndef TIME_MAX
#define TIME_MAX LONG_MAX
#endif
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
new file mode 100644
index 0000000000..73f5d1d8d3
--- /dev/null
+++ b/include/qemu/hbitmap.h
@@ -0,0 +1,208 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef HBITMAP_H
+#define HBITMAP_H 1
+
+#include <limits.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "bitops.h"
+
+typedef struct HBitmap HBitmap;
+typedef struct HBitmapIter HBitmapIter;
+
+#define BITS_PER_LEVEL (BITS_PER_LONG == 32 ? 5 : 6)
+
+/* For 32-bit, the largest that fits in a 4 GiB address space.
+ * For 64-bit, the number of sectors in 1 PiB. Good luck, in
+ * either case... :)
+ */
+#define HBITMAP_LOG_MAX_SIZE (BITS_PER_LONG == 32 ? 34 : 41)
+
+/* We need to place a sentinel in level 0 to speed up iteration. Thus,
+ * we do this instead of HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL. The
+ * difference is that it allocates an extra level when HBITMAP_LOG_MAX_SIZE
+ * is an exact multiple of BITS_PER_LEVEL.
+ */
+#define HBITMAP_LEVELS ((HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL) + 1)
+
+struct HBitmapIter {
+ const HBitmap *hb;
+
+ /* Copied from hb for access in the inline functions (hb is opaque). */
+ int granularity;
+
+ /* Entry offset into the last-level array of longs. */
+ size_t pos;
+
+ /* The currently-active path in the tree. Each item of cur[i] stores
+ * the bits (i.e. the subtrees) yet to be processed under that node.
+ */
+ unsigned long cur[HBITMAP_LEVELS];
+};
+
+/**
+ * hbitmap_alloc:
+ * @size: Number of bits in the bitmap.
+ * @granularity: Granularity of the bitmap. Aligned groups of 2^@granularity
+ * bits will be represented by a single bit. Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire set; iteration will only visit the first
+ * bit of each group.
+ *
+ * Allocate a new HBitmap.
+ */
+HBitmap *hbitmap_alloc(uint64_t size, int granularity);
+
+/**
+ * hbitmap_empty:
+ * @hb: HBitmap to operate on.
+ *
+ * Return whether the bitmap is empty.
+ */
+bool hbitmap_empty(const HBitmap *hb);
+
+/**
+ * hbitmap_granularity:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the granularity of the HBitmap.
+ */
+int hbitmap_granularity(const HBitmap *hb);
+
+/**
+ * hbitmap_count:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the number of bits set in the HBitmap.
+ */
+uint64_t hbitmap_count(const HBitmap *hb);
+
+/**
+ * hbitmap_set:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to set (0-based).
+ * @count: Number of bits to set.
+ *
+ * Set a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_reset:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to reset (0-based).
+ * @count: Number of bits to reset.
+ *
+ * Reset a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_get:
+ * @hb: HBitmap to operate on.
+ * @item: Bit to query (0-based).
+ *
+ * Return whether the @item-th bit in an HBitmap is set.
+ */
+bool hbitmap_get(const HBitmap *hb, uint64_t item);
+
+/**
+ * hbitmap_free:
+ * @hb: HBitmap to operate on.
+ *
+ * Free an HBitmap and all of its associated memory.
+ */
+void hbitmap_free(HBitmap *hb);
+
+/**
+ * hbitmap_iter_init:
+ * @hbi: HBitmapIter to initialize.
+ * @hb: HBitmap to iterate on.
+ * @first: First bit to visit (0-based, must be strictly less than the
+ * size of the bitmap).
+ *
+ * Set up @hbi to iterate on the HBitmap @hb. hbitmap_iter_next will return
+ * the lowest-numbered bit that is set in @hb, starting at @first.
+ *
+ * Concurrent setting of bits is acceptable, and will at worst cause the
+ * iteration to miss some of those bits. Resetting bits before the current
+ * position of the iterator is also okay. However, concurrent resetting of
+ * bits can lead to unexpected behavior if the iterator has not yet reached
+ * those bits.
+ */
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
+
+/* hbitmap_iter_skip_words:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Internal function used by hbitmap_iter_next and hbitmap_iter_next_word.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
+
+/**
+ * hbitmap_iter_next:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Return the next bit that is set in @hbi's associated HBitmap,
+ * or -1 if all remaining bits are zero.
+ */
+static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
+{
+ unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+ int64_t item;
+
+ if (cur == 0) {
+ cur = hbitmap_iter_skip_words(hbi);
+ if (cur == 0) {
+ return -1;
+ }
+ }
+
+ /* The next call will resume work from the next bit. */
+ hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+ item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ffsl(cur) - 1;
+
+ return item << hbi->granularity;
+}
+
+/**
+ * hbitmap_iter_next_word:
+ * @hbi: HBitmapIter to operate on.
+ * @p_cur: Location where to store the next non-zero word.
+ *
+ * Return the index of the next nonzero word that is set in @hbi's
+ * associated HBitmap, and set *p_cur to the content of that word
+ * (bits before the index that was passed to hbitmap_iter_init are
+ * trimmed on the first call). Return -1, and set *p_cur to zero,
+ * if all remaining words are zero.
+ */
+static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur)
+{
+ unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+
+ if (cur == 0) {
+ cur = hbitmap_iter_skip_words(hbi);
+ if (cur == 0) {
+ *p_cur = 0;
+ return -1;
+ }
+ }
+
+ /* The next call will resume work from the next word. */
+ hbi->cur[HBITMAP_LEVELS - 1] = 0;
+ *p_cur = cur;
+ return hbi->pos;
+}
+
+
+#endif
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 81c9a754ae..2a32be4cc0 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -26,6 +26,7 @@
#define HOST_UTILS_H 1
#include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */
+#include <string.h> /* ffsl */
#if defined(__x86_64__)
#define __HAVE_FAST_MULU64__
@@ -237,4 +238,29 @@ static inline int ctpop64(uint64_t val)
#endif
}
+/* glibc does not provide an inline version of ffsl, so always define
+ * ours. We need to give it a different name, however.
+ */
+#ifdef __GLIBC__
+#define ffsl qemu_ffsl
+#endif
+static inline int ffsl(long val)
+{
+ if (!val) {
+ return 0;
+ }
+
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_ctzl(val) + 1;
+#else
+ if (sizeof(long) == 4) {
+ return ctz32(val) + 1;
+ } else if (sizeof(long) == 8) {
+ return ctz64(val) + 1;
+ } else {
+ abort();
+ }
+#endif
+}
+
#endif
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 773caf9fa1..46f2247274 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -40,6 +40,8 @@ typedef struct CPUState CPUState;
/**
* CPUClass:
+ * @class_by_name: Callback to map -cpu command line model name to an
+ * instantiatable CPU type.
* @reset: Callback to reset the #CPUState to its initial state.
*
* Represents a CPU family or model.
@@ -49,6 +51,8 @@ typedef struct CPUClass {
DeviceClass parent_class;
/*< public >*/
+ ObjectClass *(*class_by_name)(const char *cpu_model);
+
void (*reset)(CPUState *cpu);
} CPUClass;
@@ -89,10 +93,8 @@ struct CPUState {
bool stop;
bool stopped;
-#if !defined(CONFIG_USER_ONLY)
int kvm_fd;
bool kvm_vcpu_dirty;
-#endif
struct KVMState *kvm_state;
struct kvm_run *kvm_run;
@@ -108,6 +110,17 @@ struct CPUState {
void cpu_reset(CPUState *cpu);
/**
+ * cpu_class_by_name:
+ * @typename: The CPU base type.
+ * @cpu_model: The model string without any parameters.
+ *
+ * Looks up a CPU #ObjectClass matching name @cpu_model.
+ *
+ * Returns: A #CPUClass or %NULL if not matching class is found.
+ */
+ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model);
+
+/**
* qemu_cpu_has_work:
* @cpu: The vCPU to check.
*
diff --git a/include/qom/object.h b/include/qom/object.h
index 8e16ea8a44..48e80ba229 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -691,6 +691,14 @@ ObjectClass *object_class_get_parent(ObjectClass *klass);
const char *object_class_get_name(ObjectClass *klass);
/**
+ * object_class_is_abstract:
+ * @klass: The class to obtain the abstractness for.
+ *
+ * Returns: %true if @klass is abstract, %false otherwise.
+ */
+bool object_class_is_abstract(ObjectClass *klass);
+
+/**
* object_class_by_name:
* @typename: The QOM typename to obtain the class for.
*
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 81bd81773f..f7f6854259 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -13,9 +13,16 @@ void cpu_synchronize_all_post_init(void);
void qtest_clock_warp(int64_t dest);
+#ifndef CONFIG_USER_ONLY
/* vl.c */
extern int smp_cores;
extern int smp_threads;
+#else
+/* *-user doesn't have configurable SMP topology */
+#define smp_cores 1
+#define smp_threads 1
+#endif
+
void set_numa_modes(void);
void set_cpu_log(const char *optarg);
void set_cpu_log_filename(const char *optarg);
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 6bdd51373e..6e6dfb374a 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -36,6 +36,7 @@
#define KVM_FEATURE_ASYNC_PF 0
#define KVM_FEATURE_STEAL_TIME 0
#define KVM_FEATURE_PV_EOI 0
+#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 0
#endif
extern int kvm_allowed;
@@ -158,7 +159,7 @@ int kvm_update_guest_debug(CPUArchState *env, unsigned long reinject_trap);
int kvm_set_signal_mask(CPUArchState *env, const sigset_t *sigset);
#endif
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr);
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
int kvm_on_sigbus(int code, void *addr);
/* internal API */
@@ -195,6 +196,9 @@ int kvm_arch_init(KVMState *s);
int kvm_arch_init_vcpu(CPUState *cpu);
+/* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
+unsigned long kvm_arch_vcpu_id(CPUState *cpu);
+
void kvm_arch_reset_vcpu(CPUState *cpu);
int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
diff --git a/kvm-all.c b/kvm-all.c
index 6278d615b1..04ec2d541a 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -222,7 +222,7 @@ int kvm_init_vcpu(CPUState *cpu)
DPRINTF("kvm_init_vcpu\n");
- ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, cpu->cpu_index);
+ ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu));
if (ret < 0) {
DPRINTF("kvm_create_vcpu failed\n");
goto err;
@@ -2026,9 +2026,8 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
return 0;
}
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
{
- CPUState *cpu = ENV_GET_CPU(env);
return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
}
diff --git a/kvm-stub.c b/kvm-stub.c
index 47f8dca7d5..760aadc874 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -112,7 +112,7 @@ int kvm_set_ioeventfd_mmio(int fd, uint32_t adr, uint32_t val, bool assign, uint
return -ENOSYS;
}
-int kvm_on_sigbus_vcpu(CPUArchState *env, int code, void *addr)
+int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
{
return 1;
}
diff --git a/qapi-schema.json b/qapi-schema.json
index 6d7252b9e8..3a4817b391 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -325,6 +325,80 @@
{ 'command': 'query-chardev', 'returns': ['ChardevInfo'] }
##
+# @DataFormat:
+#
+# An enumeration of data format.
+#
+# @utf8: The data format is 'utf8'.
+#
+# @base64: The data format is 'base64'.
+#
+# Since: 1.4
+##
+{ 'enum': 'DataFormat'
+ 'data': [ 'utf8', 'base64' ] }
+
+##
+# @memchar-write:
+#
+# Provide writing interface for memchardev. Write data to char
+# device 'memory'.
+#
+# @device: the name of the memory char device.
+#
+# @size: the size to write in bytes.
+#
+# @data: the source data write to memchar.
+#
+# @format: #optional the format of the data write to chardev 'memory',
+# by default is 'utf8'.
+#
+# Returns: Nothing on success
+# If @device is not a valid char device, DeviceNotFound
+#
+# Since: 1.4
+##
+{ 'command': 'memchar-write',
+ 'data': {'device': 'str', 'size': 'int', 'data': 'str',
+ '*format': 'DataFormat'} }
+
+##
+# @MemCharRead
+#
+# Result of QMP command memchar-read.
+#
+# @data: The data read from memchar as string.
+#
+# @count: The numbers of bytes read from.
+#
+# Since: 1.4
+##
+{ 'type': 'MemCharRead',
+ 'data': { 'data': 'str', 'count': 'int' } }
+
+##
+# @memchar-read:
+#
+# Provide read interface for memchardev. Read from the char
+# device 'memory' and return the data.
+#
+# @device: the name of the memory char device.
+#
+# @size: the size to read in bytes.
+#
+# @format: #optional the format of the data want to read from
+# memchardev, by default is 'utf8'.
+#
+# Returns: @MemCharRead
+# If @device is not a valid memchr device, DeviceNotFound
+#
+# Since: 1.4
+##
+{ 'command': 'memchar-read',
+ 'data': {'device': 'str', 'size': 'int', '*format': 'DataFormat'},
+ 'returns': 'MemCharRead' }
+
+##
# @CommandInfo:
#
# Information about a QMP command
@@ -667,10 +741,12 @@
#
# @count: number of dirty bytes according to the dirty bitmap
#
+# @granularity: granularity of the dirty bitmap in bytes (since 1.4)
+#
# Since: 1.3
##
{ 'type': 'BlockDirtyInfo',
- 'data': {'count': 'int'} }
+ 'data': {'count': 'int', 'granularity': 'int'} }
##
# @BlockInfo:
@@ -977,28 +1053,10 @@
#
# @actual: the number of bytes the balloon currently contains
#
-# @mem_swapped_in: #optional number of pages swapped in within the guest
-#
-# @mem_swapped_out: #optional number of pages swapped out within the guest
-#
-# @major_page_faults: #optional number of major page faults within the guest
-#
-# @minor_page_faults: #optional number of minor page faults within the guest
-#
-# @free_mem: #optional amount of memory (in bytes) free in the guest
-#
-# @total_mem: #optional amount of memory (in bytes) visible to the guest
-#
# Since: 0.14.0
#
-# Notes: all current versions of QEMU do not fill out optional information in
-# this structure.
##
-{ 'type': 'BalloonInfo',
- 'data': {'actual': 'int', '*mem_swapped_in': 'int',
- '*mem_swapped_out': 'int', '*major_page_faults': 'int',
- '*minor_page_faults': 'int', '*free_mem': 'int',
- '*total_mem': 'int'} }
+{ 'type': 'BalloonInfo', 'data': {'actual': 'int' } }
##
# @query-balloon:
@@ -1634,6 +1692,14 @@
# (all the disk, only the sectors allocated in the topmost image, or
# only new I/O).
#
+# @granularity: #optional granularity of the dirty bitmap, default is 64K
+# if the image format doesn't have clusters, 4K if the clusters
+# are smaller than that, else the cluster size. Must be a
+# power of 2 between 512 and 64M (since 1.4).
+#
+# @buf-size: #optional maximum amount of data in flight from source to
+# target (since 1.4).
+#
# @on-source-error: #optional the action to take on an error on the source,
# default 'report'. 'stop' and 'enospc' can only be used
# if the block device supports io-status (see BlockInfo).
@@ -1650,7 +1716,8 @@
{ 'command': 'drive-mirror',
'data': { 'device': 'str', 'target': 'str', '*format': 'str',
'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
- '*speed': 'int', '*on-source-error': 'BlockdevOnError',
+ '*speed': 'int', '*granularity': 'uint32',
+ '*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError' } }
##
diff --git a/qemu-char.c b/qemu-char.c
index da1db1d104..ac5d62dd9a 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -98,6 +98,7 @@
#include "ui/qemu-spice.h"
#define READ_BUF_LEN 4096
+#define CBUFF_SIZE 65536
/***********************************************************/
/* character device */
@@ -2643,6 +2644,199 @@ size_t qemu_chr_mem_osize(const CharDriverState *chr)
return d->outbuf_size;
}
+/*********************************************************/
+/*CircularMemory chardev*/
+
+typedef struct {
+ size_t size;
+ size_t prod;
+ size_t cons;
+ uint8_t *cbuf;
+} CirMemCharDriver;
+
+static bool cirmem_chr_is_empty(const CharDriverState *chr)
+{
+ const CirMemCharDriver *d = chr->opaque;
+
+ return d->cons == d->prod;
+}
+
+static size_t qemu_chr_cirmem_count(const CharDriverState *chr)
+{
+ const CirMemCharDriver *d = chr->opaque;
+
+ return (d->prod - d->cons);
+}
+
+static int cirmem_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
+{
+ CirMemCharDriver *d = chr->opaque;
+ int i;
+
+ if (!buf || (len < 0)) {
+ return -1;
+ }
+
+ for (i = 0; i < len; i++ ) {
+ /* Avoid writing the IAC information to the queue. */
+ if ((unsigned char)buf[i] == IAC) {
+ continue;
+ }
+
+ d->cbuf[d->prod++ % d->size] = buf[i];
+ if ((d->prod - d->cons) > d->size) {
+ d->cons = d->prod - d->size;
+ }
+ }
+
+ return 0;
+}
+
+static int cirmem_chr_read(CharDriverState *chr, uint8_t *buf, int len)
+{
+ CirMemCharDriver *d = chr->opaque;
+ int i;
+
+ for (i = 0; i < len && !cirmem_chr_is_empty(chr); i++) {
+ buf[i] = d->cbuf[d->cons++ % d->size];
+ }
+
+ return i;
+}
+
+static void cirmem_chr_close(struct CharDriverState *chr)
+{
+ CirMemCharDriver *d = chr->opaque;
+
+ g_free(d->cbuf);
+ g_free(d);
+ chr->opaque = NULL;
+}
+
+static CharDriverState *qemu_chr_open_cirmemchr(QemuOpts *opts)
+{
+ CharDriverState *chr;
+ CirMemCharDriver *d;
+
+ chr = g_malloc0(sizeof(CharDriverState));
+ d = g_malloc(sizeof(*d));
+
+ d->size = qemu_opt_get_number(opts, "maxcapacity", 0);
+ if (d->size == 0) {
+ d->size = CBUFF_SIZE;
+ }
+
+ /* The size must be power of 2 */
+ if (d->size & (d->size - 1)) {
+ fprintf(stderr, "chardev: size of memory device must be power of 2\n");
+ goto fail;
+ }
+
+ d->prod = 0;
+ d->cons = 0;
+ d->cbuf = g_malloc0(d->size);
+
+ chr->opaque = d;
+ chr->chr_write = cirmem_chr_write;
+ chr->chr_close = cirmem_chr_close;
+
+ return chr;
+
+fail:
+ g_free(d);
+ g_free(chr);
+ return NULL;
+}
+
+static bool qemu_is_chr(const CharDriverState *chr, const char *filename)
+{
+ return strcmp(chr->filename, filename);
+}
+
+void qmp_memchar_write(const char *device, int64_t size,
+ const char *data, bool has_format,
+ enum DataFormat format,
+ Error **errp)
+{
+ CharDriverState *chr;
+ guchar *write_data;
+ int ret;
+ gsize write_count;
+
+ chr = qemu_chr_find(device);
+ if (!chr) {
+ error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ return;
+ }
+
+ if (qemu_is_chr(chr, "memory")) {
+ error_setg(errp,"%s is not memory char device", device);
+ return;
+ }
+
+ write_count = (gsize)size;
+
+ if (has_format && (format == DATA_FORMAT_BASE64)) {
+ write_data = g_base64_decode(data, &write_count);
+ } else {
+ write_data = (uint8_t *)data;
+ }
+
+ ret = cirmem_chr_write(chr, write_data, write_count);
+
+ if (ret < 0) {
+ error_setg(errp, "Failed to write to device %s", device);
+ return;
+ }
+}
+
+MemCharRead *qmp_memchar_read(const char *device, int64_t size,
+ bool has_format, enum DataFormat format,
+ Error **errp)
+{
+ CharDriverState *chr;
+ guchar *read_data;
+ MemCharRead *meminfo;
+ size_t count;
+
+ chr = qemu_chr_find(device);
+ if (!chr) {
+ error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ return NULL;
+ }
+
+ if (qemu_is_chr(chr, "memory")) {
+ error_setg(errp,"%s is not memory char device", device);
+ return NULL;
+ }
+
+ if (size <= 0) {
+ error_setg(errp, "size must be greater than zero");
+ return NULL;
+ }
+
+ meminfo = g_malloc0(sizeof(MemCharRead));
+
+ count = qemu_chr_cirmem_count(chr);
+ if (count == 0) {
+ meminfo->data = g_strdup("");
+ return meminfo;
+ }
+
+ size = size > count ? count : size;
+ read_data = g_malloc0(size + 1);
+
+ meminfo->count = cirmem_chr_read(chr, read_data, size);
+
+ if (has_format && (format == DATA_FORMAT_BASE64)) {
+ meminfo->data = g_base64_encode(read_data, (size_t)meminfo->count);
+ } else {
+ meminfo->data = (char *)read_data;
+ }
+
+ return meminfo;
+}
+
QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
{
char host[65], port[33], width[8], height[8];
@@ -2697,6 +2891,11 @@ QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename)
qemu_opt_set(opts, "path", filename);
return opts;
}
+ if (strstart(filename, "memory", &p)) {
+ qemu_opt_set(opts, "backend", "memory");
+ qemu_opt_set(opts, "maxcapacity", p);
+ return opts;
+ }
if (strstart(filename, "file:", &p)) {
qemu_opt_set(opts, "backend", "file");
qemu_opt_set(opts, "path", p);
@@ -2796,6 +2995,7 @@ static const struct {
{ .name = "udp", .open = qemu_chr_open_udp },
{ .name = "msmouse", .open = qemu_chr_open_msmouse },
{ .name = "vc", .open = text_console_init },
+ { .name = "memory", .open = qemu_chr_open_cirmemchr },
#ifdef _WIN32
{ .name = "file", .open = qemu_chr_open_win_file_out },
{ .name = "pipe", .open = qemu_chr_open_win_pipe },
@@ -3055,6 +3255,9 @@ QemuOptsList qemu_chardev_opts = {
},{
.name = "debug",
.type = QEMU_OPT_NUMBER,
+ },{
+ .name = "maxcapacity",
+ .type = QEMU_OPT_NUMBER,
},
{ /* end of list */ }
},
diff --git a/qemu-options.hx b/qemu-options.hx
index 4e2b4994a2..2d44137bf9 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1736,6 +1736,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
"-chardev msmouse,id=id[,mux=on|off]\n"
"-chardev vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]\n"
" [,mux=on|off]\n"
+ "-chardev memory,id=id,maxcapacity=maxcapacity\n"
"-chardev file,id=id,path=path[,mux=on|off]\n"
"-chardev pipe,id=id,path=path[,mux=on|off]\n"
#ifdef _WIN32
@@ -1777,6 +1778,7 @@ Backend is one of:
@option{udp},
@option{msmouse},
@option{vc},
+@option{memory},
@option{file},
@option{pipe},
@option{console},
@@ -1885,6 +1887,14 @@ the console, in pixels.
@option{cols} and @option{rows} specify that the console be sized to fit a text
console with the given dimensions.
+@item -chardev memory ,id=@var{id} ,maxcapacity=@var{maxcapacity}
+
+Create a circular buffer with fixed size indicated by optionally @option{maxcapacity}
+which will be default 64K if it is not given.
+
+@option{maxcapacity} specifies the max capacity of the size of circular buffer
+to create. Should be power of 2.
+
@item -chardev file ,id=@var{id} ,path=@var{path}
Log all traffic received from the guest to a file.
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 0ad73f3430..7a0202eb2a 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -611,13 +611,14 @@ int64_t qmp_guest_fsfreeze_thaw(Error **err)
static void guest_fsfreeze_cleanup(void)
{
- int64_t ret;
Error *err = NULL;
if (ga_is_frozen(ga_state) == GUEST_FSFREEZE_STATUS_FROZEN) {
- ret = qmp_guest_fsfreeze_thaw(&err);
- if (ret < 0 || err) {
- slog("failed to clean up frozen filesystems");
+ qmp_guest_fsfreeze_thaw(&err);
+ if (err) {
+ slog("failed to clean up frozen filesystems: %s",
+ error_get_pretty(err));
+ error_free(err);
}
}
}
@@ -934,9 +935,11 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
error_setg_errno(errp, errno,
"failed to get MAC address of %s",
ifa->ifa_name);
+ close(sock);
goto error;
}
+ close(sock);
mac_addr = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
info->value->hardware_address =
@@ -946,20 +949,19 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
(int) mac_addr[4], (int) mac_addr[5]);
info->value->has_hardware_address = true;
- close(sock);
}
if (ifa->ifa_addr &&
ifa->ifa_addr->sa_family == AF_INET) {
/* interface with IPv4 address */
- address_item = g_malloc0(sizeof(*address_item));
- address_item->value = g_malloc0(sizeof(*address_item->value));
p = &((struct sockaddr_in *)ifa->ifa_addr)->sin_addr;
if (!inet_ntop(AF_INET, p, addr4, sizeof(addr4))) {
error_setg_errno(errp, errno, "inet_ntop failed");
goto error;
}
+ address_item = g_malloc0(sizeof(*address_item));
+ address_item->value = g_malloc0(sizeof(*address_item->value));
address_item->value->ip_address = g_strdup(addr4);
address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV4;
@@ -972,14 +974,14 @@ GuestNetworkInterfaceList *qmp_guest_network_get_interfaces(Error **errp)
} else if (ifa->ifa_addr &&
ifa->ifa_addr->sa_family == AF_INET6) {
/* interface with IPv6 address */
- address_item = g_malloc0(sizeof(*address_item));
- address_item->value = g_malloc0(sizeof(*address_item->value));
p = &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr;
if (!inet_ntop(AF_INET6, p, addr6, sizeof(addr6))) {
error_setg_errno(errp, errno, "inet_ntop failed");
goto error;
}
+ address_item = g_malloc0(sizeof(*address_item));
+ address_item->value = g_malloc0(sizeof(*address_item->value));
address_item->value->ip_address = g_strdup(addr6);
address_item->value->ip_address_type = GUEST_IP_ADDRESS_TYPE_IPV6;
diff --git a/qmp-commands.hx b/qmp-commands.hx
index cbf12804be..f58a8411ea 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -466,6 +466,72 @@ Note: inject-nmi fails when the guest doesn't support injecting.
EQMP
{
+ .name = "memchar-write",
+ .args_type = "device:s,size:i,data:s,format:s?",
+ .mhandler.cmd_new = qmp_marshal_input_memchar_write,
+ },
+
+SQMP
+memchar-write
+-------------
+
+Provide writing interface for CirMemCharDriver. Write data to memory
+char device.
+
+Arguments:
+
+- "device": the name of the char device, must be unique (json-string)
+- "size": the memory size, in bytes, should be power of 2 (json-int)
+- "data": the source data write to memory (json-string)
+- "format": the data format write to memory, default is
+ utf8. (json-string, optional)
+ - Possible values: "utf8", "base64"
+
+Example:
+
+-> { "execute": "memchar-write",
+ "arguments": { "device": foo,
+ "size": 8,
+ "data": "abcdefgh",
+ "format": "utf8" } }
+<- { "return": {} }
+
+EQMP
+
+ {
+ .name = "memchar-read",
+ .args_type = "device:s,size:i,format:s?",
+ .mhandler.cmd_new = qmp_marshal_input_memchar_read,
+ },
+
+SQMP
+memchar-read
+-------------
+
+Provide read interface for CirMemCharDriver. Read from the char
+device memory and return the data with size.
+
+Arguments:
+
+- "device": the name of the char device, must be unique (json-string)
+- "size": the memory size wanted to read in bytes (refer to unencoded
+ size of the raw data), would adjust to the init size of the
+ memchar if the requested size is larger than it. (json-int)
+- "format": the data format write to memchardev, default is
+ utf8. (json-string, optional)
+ - Possible values: "utf8", "base64"
+
+Example:
+
+-> { "execute": "memchar-read",
+ "arguments": { "device": foo,
+ "size": 1000,
+ "format": "utf8" } }
+<- { "return": { "data": "data string...", "count": 1000 } }
+
+EQMP
+
+ {
.name = "xen-save-devices-state",
.args_type = "filename:F",
.mhandler.cmd_new = qmp_marshal_input_xen_save_devices_state,
@@ -938,7 +1004,8 @@ EQMP
{
.name = "drive-mirror",
.args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
- "on-source-error:s?,on-target-error:s?",
+ "on-source-error:s?,on-target-error:s?,"
+ "granularity:i?,buf-size:i?",
.mhandler.cmd_new = qmp_marshal_input_drive_mirror,
},
@@ -962,6 +1029,9 @@ Arguments:
file/device (NewImageMode, optional, default 'absolute-paths')
- "speed": maximum speed of the streaming job, in bytes per second
(json-int)
+- "granularity": granularity of the dirty bitmap, in bytes (json-int, optional)
+- "buf_size": maximum amount of data in flight from source to target, in bytes
+ (json-int, default 10M)
- "sync": what parts of the disk image should be copied to the destination;
possibilities include "full" for all the disk, "top" for only the sectors
allocated in the topmost image, or "none" to only replicate new I/O
@@ -971,6 +1041,10 @@ Arguments:
- "on-target-error": the action to take on an error on the target
(BlockdevOnError, default 'report')
+The default value of the granularity is the image cluster size clamped
+between 4096 and 65536, if the image format defines one. If the format
+does not define a cluster size, the default value of the granularity
+is 65536.
Example:
@@ -2549,13 +2623,6 @@ Make an asynchronous request for balloon info. When the request completes a
json-object will be returned containing the following data:
- "actual": current balloon value in bytes (json-int)
-- "mem_swapped_in": Amount of memory swapped in bytes (json-int, optional)
-- "mem_swapped_out": Amount of memory swapped out in bytes (json-int, optional)
-- "major_page_faults": Number of major faults (json-int, optional)
-- "minor_page_faults": Number of minor faults (json-int, optional)
-- "free_mem": Total amount of free and unused memory in
- bytes (json-int, optional)
-- "total_mem": Total amount of available memory in bytes (json-int, optional)
Example:
@@ -2563,12 +2630,6 @@ Example:
<- {
"return":{
"actual":1073741824,
- "mem_swapped_in":0,
- "mem_swapped_out":0,
- "major_page_faults":142,
- "minor_page_faults":239245,
- "free_mem":1014185984,
- "total_mem":1044668416
}
}
diff --git a/qom/cpu.c b/qom/cpu.c
index 49e5134ea1..8fb538bf3b 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -34,11 +34,24 @@ static void cpu_common_reset(CPUState *cpu)
{
}
+ObjectClass *cpu_class_by_name(const char *typename, const char *cpu_model)
+{
+ CPUClass *cc = CPU_CLASS(object_class_by_name(typename));
+
+ return cc->class_by_name(cpu_model);
+}
+
+static ObjectClass *cpu_common_class_by_name(const char *cpu_model)
+{
+ return NULL;
+}
+
static void cpu_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
CPUClass *k = CPU_CLASS(klass);
+ k->class_by_name = cpu_common_class_by_name;
k->reset = cpu_common_reset;
dc->no_user = 1;
}
diff --git a/qom/object.c b/qom/object.c
index 03e6f24d28..e200282172 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -501,6 +501,11 @@ ObjectClass *object_get_class(Object *obj)
return obj->class;
}
+bool object_class_is_abstract(ObjectClass *klass)
+{
+ return klass->type->abstract;
+}
+
const char *object_class_get_name(ObjectClass *klass)
{
return klass->type->name;
diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap
index cbe6440ba3..0b23f7795a 100755
--- a/scripts/kvm/vmxcap
+++ b/scripts/kvm/vmxcap
@@ -147,6 +147,7 @@ controls = [
5: 'Enable VPID',
6: 'WBINVD exiting',
7: 'Unrestricted guest',
+ 9: 'Virtual interrupt delivery',
10: 'PAUSE-loop exiting',
11: 'RDRAND exiting',
12: 'Enable INVPCID',
diff --git a/target-alpha/cpu.c b/target-alpha/cpu.c
index 40e980933f..0ad69f0859 100644
--- a/target-alpha/cpu.c
+++ b/target-alpha/cpu.c
@@ -96,14 +96,15 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
}
oc = object_class_by_name(cpu_model);
- if (oc != NULL) {
+ if (oc != NULL && object_class_dynamic_cast(oc, TYPE_ALPHA_CPU) != NULL &&
+ !object_class_is_abstract(oc)) {
return oc;
}
for (i = 0; i < ARRAY_SIZE(alpha_cpu_aliases); i++) {
if (strcmp(cpu_model, alpha_cpu_aliases[i].alias) == 0) {
oc = object_class_by_name(alpha_cpu_aliases[i].typename);
- assert(oc != NULL);
+ assert(oc != NULL && !object_class_is_abstract(oc));
return oc;
}
}
@@ -111,6 +112,9 @@ static ObjectClass *alpha_cpu_class_by_name(const char *cpu_model)
typename = g_strdup_printf("%s-" TYPE_ALPHA_CPU, cpu_model);
oc = object_class_by_name(typename);
g_free(typename);
+ if (oc != NULL && object_class_is_abstract(oc)) {
+ oc = NULL;
+ }
return oc;
}
@@ -244,6 +248,13 @@ static void alpha_cpu_initfn(Object *obj)
env->fen = 1;
}
+static void alpha_cpu_class_init(ObjectClass *oc, void *data)
+{
+ CPUClass *cc = CPU_CLASS(oc);
+
+ cc->class_by_name = alpha_cpu_class_by_name;
+}
+
static const TypeInfo alpha_cpu_type_info = {
.name = TYPE_ALPHA_CPU,
.parent = TYPE_CPU,
@@ -251,6 +262,7 @@ static const TypeInfo alpha_cpu_type_info = {
.instance_init = alpha_cpu_initfn,
.abstract = true,
.class_size = sizeof(AlphaCPUClass),
+ .class_init = alpha_cpu_class_init,
};
static void alpha_cpu_register_types(void)
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 07588a13b2..d1a4c82680 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -201,6 +201,22 @@ void arm_cpu_realize(ARMCPU *cpu)
/* CPU models */
+static ObjectClass *arm_cpu_class_by_name(const char *cpu_model)
+{
+ ObjectClass *oc;
+
+ if (!cpu_model) {
+ return NULL;
+ }
+
+ oc = object_class_by_name(cpu_model);
+ if (!oc || !object_class_dynamic_cast(oc, TYPE_ARM_CPU) ||
+ object_class_is_abstract(oc)) {
+ return NULL;
+ }
+ return oc;
+}
+
static void arm926_initfn(Object *obj)
{
ARMCPU *cpu = ARM_CPU(obj);
@@ -766,6 +782,8 @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
acc->parent_reset = cc->reset;
cc->reset = arm_cpu_reset;
+
+ cc->class_by_name = arm_cpu_class_by_name;
}
static void cpu_register(const ARMCPUInfo *info)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 37c34a11c4..7a10fddf25 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1262,12 +1262,14 @@ ARMCPU *cpu_arm_init(const char *cpu_model)
{
ARMCPU *cpu;
CPUARMState *env;
+ ObjectClass *oc;
static int inited = 0;
- if (!object_class_by_name(cpu_model)) {
+ oc = cpu_class_by_name(TYPE_ARM_CPU, cpu_model);
+ if (!oc) {
return NULL;
}
- cpu = ARM_CPU(object_new(cpu_model));
+ cpu = ARM_CPU(object_new(object_class_get_name(oc)));
env = &cpu->env;
env->cpu_model_str = cpu_model;
arm_cpu_realize(cpu);
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 376d4c8737..5c108e17ab 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -23,6 +23,8 @@
#include "cpu.h"
#include "sysemu/kvm.h"
+#include "sysemu/cpus.h"
+#include "topology.h"
#include "qemu/option.h"
#include "qemu/config-file.h"
@@ -45,6 +47,18 @@
#include "hw/apic_internal.h"
#endif
+static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
+ uint32_t vendor2, uint32_t vendor3)
+{
+ int i;
+ for (i = 0; i < 4; i++) {
+ dst[i] = vendor1 >> (8 * i);
+ dst[i + 4] = vendor2 >> (8 * i);
+ dst[i + 8] = vendor3 >> (8 * i);
+ }
+ dst[CPUID_VENDOR_SZ] = '\0';
+}
+
/* feature flags taken from "Intel Processor Identification and the CPUID
* Instruction" and AMD's "CPUID Specification". In cases of disagreement
* between feature naming conventions, aliases may be added.
@@ -206,22 +220,17 @@ typedef struct model_features_t {
int check_cpuid = 0;
int enforce_cpuid = 0;
-#if defined(CONFIG_KVM)
static uint32_t kvm_default_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
(1 << KVM_FEATURE_NOP_IO_DELAY) |
(1 << KVM_FEATURE_CLOCKSOURCE2) |
(1 << KVM_FEATURE_ASYNC_PF) |
(1 << KVM_FEATURE_STEAL_TIME) |
+ (1 << KVM_FEATURE_PV_EOI) |
(1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
-static const uint32_t kvm_pv_eoi_features = (0x1 << KVM_FEATURE_PV_EOI);
-#else
-static uint32_t kvm_default_features = 0;
-static const uint32_t kvm_pv_eoi_features = 0;
-#endif
-void enable_kvm_pv_eoi(void)
+void disable_kvm_pv_eoi(void)
{
- kvm_default_features |= kvm_pv_eoi_features;
+ kvm_default_features &= ~(1UL << KVM_FEATURE_PV_EOI);
}
void host_cpuid(uint32_t function, uint32_t count,
@@ -338,19 +347,17 @@ static void add_flagname_to_bitmaps(const char *flagname,
}
typedef struct x86_def_t {
- struct x86_def_t *next;
const char *name;
uint32_t level;
- uint32_t vendor1, vendor2, vendor3;
+ /* vendor is zero-terminated, 12 character ASCII string */
+ char vendor[CPUID_VENDOR_SZ + 1];
int family;
int model;
int stepping;
- int tsc_khz;
uint32_t features, ext_features, ext2_features, ext3_features;
uint32_t kvm_features, svm_features;
uint32_t xlevel;
char model_id[48];
- int vendor_override;
/* Store the results of Centaur's CPUID instructions */
uint32_t ext4_features;
uint32_t xlevel2;
@@ -396,19 +403,13 @@ typedef struct x86_def_t {
#define TCG_SVM_FEATURES 0
#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
-/* maintains list of cpu model definitions
- */
-static x86_def_t *x86_defs = {NULL};
-
-/* built-in cpu model definitions (deprecated)
+/* built-in CPU model definitions
*/
static x86_def_t builtin_x86_defs[] = {
{
.name = "qemu64",
.level = 4,
- .vendor1 = CPUID_VENDOR_AMD_1,
- .vendor2 = CPUID_VENDOR_AMD_2,
- .vendor3 = CPUID_VENDOR_AMD_3,
+ .vendor = CPUID_VENDOR_AMD,
.family = 6,
.model = 2,
.stepping = 3,
@@ -425,9 +426,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "phenom",
.level = 5,
- .vendor1 = CPUID_VENDOR_AMD_1,
- .vendor2 = CPUID_VENDOR_AMD_2,
- .vendor3 = CPUID_VENDOR_AMD_3,
+ .vendor = CPUID_VENDOR_AMD,
.family = 16,
.model = 2,
.stepping = 3,
@@ -453,9 +452,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "core2duo",
.level = 10,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 15,
.stepping = 11,
@@ -474,9 +471,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "kvm64",
.level = 5,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 15,
.model = 6,
.stepping = 1,
@@ -500,9 +495,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "qemu32",
.level = 4,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 3,
.stepping = 3,
@@ -513,9 +506,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "kvm32",
.level = 5,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 15,
.model = 6,
.stepping = 1,
@@ -530,9 +521,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "coreduo",
.level = 10,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 14,
.stepping = 8,
@@ -548,9 +537,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "486",
.level = 1,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 4,
.model = 0,
.stepping = 0,
@@ -560,9 +547,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "pentium",
.level = 1,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 5,
.model = 4,
.stepping = 3,
@@ -572,9 +557,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "pentium2",
.level = 2,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 5,
.stepping = 2,
@@ -584,9 +567,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "pentium3",
.level = 2,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 7,
.stepping = 3,
@@ -596,9 +577,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "athlon",
.level = 2,
- .vendor1 = CPUID_VENDOR_AMD_1,
- .vendor2 = CPUID_VENDOR_AMD_2,
- .vendor3 = CPUID_VENDOR_AMD_3,
+ .vendor = CPUID_VENDOR_AMD,
.family = 6,
.model = 2,
.stepping = 3,
@@ -612,9 +591,7 @@ static x86_def_t builtin_x86_defs[] = {
.name = "n270",
/* original is on level 10 */
.level = 5,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 28,
.stepping = 2,
@@ -633,9 +610,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Conroe",
.level = 2,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 2,
.stepping = 3,
@@ -653,9 +628,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Penryn",
.level = 2,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 2,
.stepping = 3,
@@ -674,9 +647,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Nehalem",
.level = 2,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 2,
.stepping = 3,
@@ -695,9 +666,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Westmere",
.level = 11,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 44,
.stepping = 1,
@@ -717,9 +686,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "SandyBridge",
.level = 0xd,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 42,
.stepping = 1,
@@ -742,9 +709,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Haswell",
.level = 0xd,
- .vendor1 = CPUID_VENDOR_INTEL_1,
- .vendor2 = CPUID_VENDOR_INTEL_2,
- .vendor3 = CPUID_VENDOR_INTEL_3,
+ .vendor = CPUID_VENDOR_INTEL,
.family = 6,
.model = 60,
.stepping = 1,
@@ -772,9 +737,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Opteron_G1",
.level = 5,
- .vendor1 = CPUID_VENDOR_AMD_1,
- .vendor2 = CPUID_VENDOR_AMD_2,
- .vendor3 = CPUID_VENDOR_AMD_3,
+ .vendor = CPUID_VENDOR_AMD,
.family = 15,
.model = 6,
.stepping = 1,
@@ -796,9 +759,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Opteron_G2",
.level = 5,
- .vendor1 = CPUID_VENDOR_AMD_1,
- .vendor2 = CPUID_VENDOR_AMD_2,
- .vendor3 = CPUID_VENDOR_AMD_3,
+ .vendor = CPUID_VENDOR_AMD,
.family = 15,
.model = 6,
.stepping = 1,
@@ -822,9 +783,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Opteron_G3",
.level = 5,
- .vendor1 = CPUID_VENDOR_AMD_1,
- .vendor2 = CPUID_VENDOR_AMD_2,
- .vendor3 = CPUID_VENDOR_AMD_3,
+ .vendor = CPUID_VENDOR_AMD,
.family = 15,
.model = 6,
.stepping = 1,
@@ -850,9 +809,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Opteron_G4",
.level = 0xd,
- .vendor1 = CPUID_VENDOR_AMD_1,
- .vendor2 = CPUID_VENDOR_AMD_2,
- .vendor3 = CPUID_VENDOR_AMD_3,
+ .vendor = CPUID_VENDOR_AMD,
.family = 21,
.model = 1,
.stepping = 2,
@@ -882,9 +839,7 @@ static x86_def_t builtin_x86_defs[] = {
{
.name = "Opteron_G5",
.level = 0xd,
- .vendor1 = CPUID_VENDOR_AMD_1,
- .vendor2 = CPUID_VENDOR_AMD_2,
- .vendor3 = CPUID_VENDOR_AMD_3,
+ .vendor = CPUID_VENDOR_AMD,
.family = 21,
.model = 2,
.stepping = 0,
@@ -945,9 +900,7 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def)
x86_cpu_def->name = "host";
host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx);
- x86_cpu_def->vendor1 = ebx;
- x86_cpu_def->vendor2 = edx;
- x86_cpu_def->vendor3 = ecx;
+ x86_cpu_vendor_words2str(x86_cpu_def->vendor, ebx, edx, ecx);
host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx);
x86_cpu_def->family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF);
@@ -972,12 +925,9 @@ static void kvm_cpu_fill_host(x86_def_t *x86_cpu_def)
kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_ECX);
cpu_x86_fill_model_id(x86_cpu_def->model_id);
- x86_cpu_def->vendor_override = 0;
/* Call Centaur's CPUID instruction. */
- if (x86_cpu_def->vendor1 == CPUID_VENDOR_VIA_1 &&
- x86_cpu_def->vendor2 == CPUID_VENDOR_VIA_2 &&
- x86_cpu_def->vendor3 == CPUID_VENDOR_VIA_3) {
+ if (!strcmp(x86_cpu_def->vendor, CPUID_VENDOR_VIA)) {
host_cpuid(0xC0000000, 0, &eax, &ebx, &ecx, &edx);
eax = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);
if (eax >= 0xC0000001) {
@@ -1213,15 +1163,10 @@ static char *x86_cpuid_get_vendor(Object *obj, Error **errp)
X86CPU *cpu = X86_CPU(obj);
CPUX86State *env = &cpu->env;
char *value;
- int i;
value = (char *)g_malloc(CPUID_VENDOR_SZ + 1);
- for (i = 0; i < 4; i++) {
- value[i ] = env->cpuid_vendor1 >> (8 * i);
- value[i + 4] = env->cpuid_vendor2 >> (8 * i);
- value[i + 8] = env->cpuid_vendor3 >> (8 * i);
- }
- value[CPUID_VENDOR_SZ] = '\0';
+ x86_cpu_vendor_words2str(value, env->cpuid_vendor1, env->cpuid_vendor2,
+ env->cpuid_vendor3);
return value;
}
@@ -1246,7 +1191,6 @@ static void x86_cpuid_set_vendor(Object *obj, const char *value,
env->cpuid_vendor2 |= ((uint8_t)value[i + 4]) << (8 * i);
env->cpuid_vendor3 |= ((uint8_t)value[i + 8]) << (8 * i);
}
- env->cpuid_vendor_override = 1;
}
static char *x86_cpuid_get_model_id(Object *obj, Error **errp)
@@ -1320,34 +1264,50 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, void *opaque,
static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *name)
{
x86_def_t *def;
+ int i;
- for (def = x86_defs; def; def = def->next) {
- if (name && !strcmp(name, def->name)) {
- break;
- }
+ if (name == NULL) {
+ return -1;
}
- if (kvm_enabled() && name && strcmp(name, "host") == 0) {
+ if (kvm_enabled() && strcmp(name, "host") == 0) {
kvm_cpu_fill_host(x86_cpu_def);
- } else if (!def) {
- return -1;
- } else {
- memcpy(x86_cpu_def, def, sizeof(*def));
+ return 0;
}
- return 0;
+ for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
+ def = &builtin_x86_defs[i];
+ if (strcmp(name, def->name) == 0) {
+ memcpy(x86_cpu_def, def, sizeof(*def));
+ /* sysenter isn't supported in compatibility mode on AMD,
+ * syscall isn't supported in compatibility mode on Intel.
+ * Normally we advertise the actual CPU vendor, but you can
+ * override this using the 'vendor' property if you want to use
+ * KVM's sysenter/syscall emulation in compatibility mode and
+ * when doing cross vendor migration
+ */
+ if (kvm_enabled()) {
+ uint32_t ebx = 0, ecx = 0, edx = 0;
+ host_cpuid(0, 0, NULL, &ebx, &ecx, &edx);
+ x86_cpu_vendor_words2str(x86_cpu_def->vendor, ebx, edx, ecx);
+ }
+ return 0;
+ }
+ }
+
+ return -1;
}
/* Parse "+feature,-feature,feature=foo" CPU feature string
*/
-static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
+static void cpu_x86_parse_featurestr(X86CPU *cpu, char *features, Error **errp)
{
- unsigned int i;
char *featurestr; /* Single 'key=value" string being parsed */
/* Features to be added */
FeatureWordArray plus_features = { 0 };
/* Features to be removed */
FeatureWordArray minus_features = { 0 };
uint32_t numvalue;
+ CPUX86State *env = &cpu->env;
featurestr = features ? strtok(features, ",") : NULL;
@@ -1360,87 +1320,57 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
} else if ((val = strchr(featurestr, '='))) {
*val = 0; val++;
if (!strcmp(featurestr, "family")) {
- char *err;
- numvalue = strtoul(val, &err, 0);
- if (!*val || *err || numvalue > 0xff + 0xf) {
- fprintf(stderr, "bad numerical value %s\n", val);
- goto error;
- }
- x86_cpu_def->family = numvalue;
+ object_property_parse(OBJECT(cpu), val, featurestr, errp);
} else if (!strcmp(featurestr, "model")) {
- char *err;
- numvalue = strtoul(val, &err, 0);
- if (!*val || *err || numvalue > 0xff) {
- fprintf(stderr, "bad numerical value %s\n", val);
- goto error;
- }
- x86_cpu_def->model = numvalue;
+ object_property_parse(OBJECT(cpu), val, featurestr, errp);
} else if (!strcmp(featurestr, "stepping")) {
- char *err;
- numvalue = strtoul(val, &err, 0);
- if (!*val || *err || numvalue > 0xf) {
- fprintf(stderr, "bad numerical value %s\n", val);
- goto error;
- }
- x86_cpu_def->stepping = numvalue ;
+ object_property_parse(OBJECT(cpu), val, featurestr, errp);
} else if (!strcmp(featurestr, "level")) {
- char *err;
- numvalue = strtoul(val, &err, 0);
- if (!*val || *err) {
- fprintf(stderr, "bad numerical value %s\n", val);
- goto error;
- }
- x86_cpu_def->level = numvalue;
+ object_property_parse(OBJECT(cpu), val, featurestr, errp);
} else if (!strcmp(featurestr, "xlevel")) {
char *err;
+ char num[32];
+
numvalue = strtoul(val, &err, 0);
if (!*val || *err) {
- fprintf(stderr, "bad numerical value %s\n", val);
- goto error;
+ error_setg(errp, "bad numerical value %s\n", val);
+ goto out;
}
if (numvalue < 0x80000000) {
+ fprintf(stderr, "xlevel value shall always be >= 0x80000000"
+ ", fixup will be removed in future versions\n");
numvalue += 0x80000000;
}
- x86_cpu_def->xlevel = numvalue;
+ snprintf(num, sizeof(num), "%" PRIu32, numvalue);
+ object_property_parse(OBJECT(cpu), num, featurestr, errp);
} else if (!strcmp(featurestr, "vendor")) {
- if (strlen(val) != 12) {
- fprintf(stderr, "vendor string must be 12 chars long\n");
- goto error;
- }
- x86_cpu_def->vendor1 = 0;
- x86_cpu_def->vendor2 = 0;
- x86_cpu_def->vendor3 = 0;
- for(i = 0; i < 4; i++) {
- x86_cpu_def->vendor1 |= ((uint8_t)val[i ]) << (8 * i);
- x86_cpu_def->vendor2 |= ((uint8_t)val[i + 4]) << (8 * i);
- x86_cpu_def->vendor3 |= ((uint8_t)val[i + 8]) << (8 * i);
- }
- x86_cpu_def->vendor_override = 1;
+ object_property_parse(OBJECT(cpu), val, featurestr, errp);
} else if (!strcmp(featurestr, "model_id")) {
- pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id),
- val);
+ object_property_parse(OBJECT(cpu), val, "model-id", errp);
} else if (!strcmp(featurestr, "tsc_freq")) {
int64_t tsc_freq;
char *err;
+ char num[32];
tsc_freq = strtosz_suffix_unit(val, &err,
STRTOSZ_DEFSUFFIX_B, 1000);
if (tsc_freq < 0 || *err) {
- fprintf(stderr, "bad numerical value %s\n", val);
- goto error;
+ error_setg(errp, "bad numerical value %s\n", val);
+ goto out;
}
- x86_cpu_def->tsc_khz = tsc_freq / 1000;
+ snprintf(num, sizeof(num), "%" PRId64, tsc_freq);
+ object_property_parse(OBJECT(cpu), num, "tsc-frequency", errp);
} else if (!strcmp(featurestr, "hv_spinlocks")) {
char *err;
numvalue = strtoul(val, &err, 0);
if (!*val || *err) {
- fprintf(stderr, "bad numerical value %s\n", val);
- goto error;
+ error_setg(errp, "bad numerical value %s\n", val);
+ goto out;
}
hyperv_set_spinlock_retries(numvalue);
} else {
- fprintf(stderr, "unrecognized feature %s\n", featurestr);
- goto error;
+ error_setg(errp, "unrecognized feature %s\n", featurestr);
+ goto out;
}
} else if (!strcmp(featurestr, "check")) {
check_cpuid = 1;
@@ -1451,31 +1381,34 @@ static int cpu_x86_parse_featurestr(x86_def_t *x86_cpu_def, char *features)
} else if (!strcmp(featurestr, "hv_vapic")) {
hyperv_enable_vapic_recommended(true);
} else {
- fprintf(stderr, "feature string `%s' not in format (+feature|-feature|feature=xyz)\n", featurestr);
- goto error;
+ error_setg(errp, "feature string `%s' not in format (+feature|"
+ "-feature|feature=xyz)\n", featurestr);
+ goto out;
+ }
+ if (error_is_set(errp)) {
+ goto out;
}
featurestr = strtok(NULL, ",");
}
- x86_cpu_def->features |= plus_features[FEAT_1_EDX];
- x86_cpu_def->ext_features |= plus_features[FEAT_1_ECX];
- x86_cpu_def->ext2_features |= plus_features[FEAT_8000_0001_EDX];
- x86_cpu_def->ext3_features |= plus_features[FEAT_8000_0001_ECX];
- x86_cpu_def->ext4_features |= plus_features[FEAT_C000_0001_EDX];
- x86_cpu_def->kvm_features |= plus_features[FEAT_KVM];
- x86_cpu_def->svm_features |= plus_features[FEAT_SVM];
- x86_cpu_def->cpuid_7_0_ebx_features |= plus_features[FEAT_7_0_EBX];
- x86_cpu_def->features &= ~minus_features[FEAT_1_EDX];
- x86_cpu_def->ext_features &= ~minus_features[FEAT_1_ECX];
- x86_cpu_def->ext2_features &= ~minus_features[FEAT_8000_0001_EDX];
- x86_cpu_def->ext3_features &= ~minus_features[FEAT_8000_0001_ECX];
- x86_cpu_def->ext4_features &= ~minus_features[FEAT_C000_0001_EDX];
- x86_cpu_def->kvm_features &= ~minus_features[FEAT_KVM];
- x86_cpu_def->svm_features &= ~minus_features[FEAT_SVM];
- x86_cpu_def->cpuid_7_0_ebx_features &= ~minus_features[FEAT_7_0_EBX];
- return 0;
+ env->cpuid_features |= plus_features[FEAT_1_EDX];
+ env->cpuid_ext_features |= plus_features[FEAT_1_ECX];
+ env->cpuid_ext2_features |= plus_features[FEAT_8000_0001_EDX];
+ env->cpuid_ext3_features |= plus_features[FEAT_8000_0001_ECX];
+ env->cpuid_ext4_features |= plus_features[FEAT_C000_0001_EDX];
+ env->cpuid_kvm_features |= plus_features[FEAT_KVM];
+ env->cpuid_svm_features |= plus_features[FEAT_SVM];
+ env->cpuid_7_0_ebx_features |= plus_features[FEAT_7_0_EBX];
+ env->cpuid_features &= ~minus_features[FEAT_1_EDX];
+ env->cpuid_ext_features &= ~minus_features[FEAT_1_ECX];
+ env->cpuid_ext2_features &= ~minus_features[FEAT_8000_0001_EDX];
+ env->cpuid_ext3_features &= ~minus_features[FEAT_8000_0001_ECX];
+ env->cpuid_ext4_features &= ~minus_features[FEAT_C000_0001_EDX];
+ env->cpuid_kvm_features &= ~minus_features[FEAT_KVM];
+ env->cpuid_svm_features &= ~minus_features[FEAT_SVM];
+ env->cpuid_7_0_ebx_features &= ~minus_features[FEAT_7_0_EBX];
-error:
- return -1;
+out:
+ return;
}
/* generate a composite string into buf of all cpuid names in featureset
@@ -1513,8 +1446,10 @@ void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
{
x86_def_t *def;
char buf[256];
+ int i;
- for (def = x86_defs; def; def = def->next) {
+ for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
+ def = &builtin_x86_defs[i];
snprintf(buf, sizeof(buf), "%s", def->name);
(*cpu_fprintf)(f, "x86 %16s %-48s\n", buf, def->model_id);
}
@@ -1536,11 +1471,13 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
{
CpuDefinitionInfoList *cpu_list = NULL;
x86_def_t *def;
+ int i;
- for (def = x86_defs; def; def = def->next) {
+ for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) {
CpuDefinitionInfoList *entry;
CpuDefinitionInfo *info;
+ def = &builtin_x86_defs[i];
info = g_malloc0(sizeof(*info));
info->name = g_strdup(def->name);
@@ -1602,18 +1539,12 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
goto out;
}
- def->kvm_features |= kvm_default_features;
+ if (kvm_enabled()) {
+ def->kvm_features |= kvm_default_features;
+ }
def->ext_features |= CPUID_EXT_HYPERVISOR;
- if (cpu_x86_parse_featurestr(def, features) < 0) {
- error_setg(&error, "Invalid cpu_model string format: %s", cpu_model);
- goto out;
- }
- assert(def->vendor1);
- env->cpuid_vendor1 = def->vendor1;
- env->cpuid_vendor2 = def->vendor2;
- env->cpuid_vendor3 = def->vendor3;
- env->cpuid_vendor_override = def->vendor_override;
+ object_property_set_str(OBJECT(cpu), def->vendor, "vendor", &error);
object_property_set_int(OBJECT(cpu), def->level, "level", &error);
object_property_set_int(OBJECT(cpu), def->family, "family", &error);
object_property_set_int(OBJECT(cpu), def->model, "model", &error);
@@ -1628,11 +1559,13 @@ int cpu_x86_register(X86CPU *cpu, const char *cpu_model)
env->cpuid_ext4_features = def->ext4_features;
env->cpuid_7_0_ebx_features = def->cpuid_7_0_ebx_features;
env->cpuid_xlevel2 = def->xlevel2;
- object_property_set_int(OBJECT(cpu), (int64_t)def->tsc_khz * 1000,
- "tsc-frequency", &error);
object_property_set_str(OBJECT(cpu), def->model_id, "model-id", &error);
+ if (error) {
+ goto out;
+ }
+ cpu_x86_parse_featurestr(cpu, features, &error);
out:
g_strfreev(model_pieces);
if (error) {
@@ -1661,7 +1594,6 @@ void x86_cpudef_setup(void)
for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); ++i) {
x86_def_t *def = &builtin_x86_defs[i];
- def->next = x86_defs;
/* Look for specific "cpudef" models that */
/* have the QEMU version in .model_id */
@@ -1674,8 +1606,6 @@ void x86_cpudef_setup(void)
break;
}
}
-
- x86_defs = def;
}
}
@@ -1685,16 +1615,6 @@ static void get_cpuid_vendor(CPUX86State *env, uint32_t *ebx,
*ebx = env->cpuid_vendor1;
*edx = env->cpuid_vendor2;
*ecx = env->cpuid_vendor3;
-
- /* sysenter isn't supported on compatibility mode on AMD, syscall
- * isn't supported in compatibility mode on Intel.
- * Normally we advertise the actual cpu vendor, but you can override
- * this if you want to use KVM's sysenter/syscall emulation
- * in compatibility mode and when doing cross vendor migration
- */
- if (kvm_enabled() && ! env->cpuid_vendor_override) {
- host_cpuid(0, 0, NULL, ebx, ecx, edx);
- }
}
void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
@@ -2197,6 +2117,39 @@ void x86_cpu_realize(Object *obj, Error **errp)
cpu_reset(CPU(cpu));
}
+/* Enables contiguous-apic-ID mode, for compatibility */
+static bool compat_apic_id_mode;
+
+void enable_compat_apic_id_mode(void)
+{
+ compat_apic_id_mode = true;
+}
+
+/* Calculates initial APIC ID for a specific CPU index
+ *
+ * Currently we need to be able to calculate the APIC ID from the CPU index
+ * alone (without requiring a CPU object), as the QEMU<->Seabios interfaces have
+ * no concept of "CPU index", and the NUMA tables on fw_cfg need the APIC ID of
+ * all CPUs up to max_cpus.
+ */
+uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index)
+{
+ uint32_t correct_id;
+ static bool warned;
+
+ correct_id = x86_apicid_from_cpu_idx(smp_cores, smp_threads, cpu_index);
+ if (compat_apic_id_mode) {
+ if (cpu_index != correct_id && !warned) {
+ error_report("APIC IDs set in compatibility mode, "
+ "CPU topology won't match the configuration");
+ warned = true;
+ }
+ return cpu_index;
+ } else {
+ return correct_id;
+ }
+}
+
static void x86_cpu_initfn(Object *obj)
{
CPUState *cs = CPU(obj);
@@ -2231,7 +2184,7 @@ static void x86_cpu_initfn(Object *obj)
x86_cpuid_get_tsc_freq,
x86_cpuid_set_tsc_freq, NULL, NULL, NULL);
- env->cpuid_apic_id = cs->cpu_index;
+ env->cpuid_apic_id = x86_cpu_apic_id_from_index(cs->cpu_index);
/* init various static tables used in TCG mode */
if (tcg_enabled() && !inited) {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 4e091cdec3..62508dc688 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -537,14 +537,14 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
#define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
#define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
+#define CPUID_VENDOR_INTEL "GenuineIntel"
#define CPUID_VENDOR_AMD_1 0x68747541 /* "Auth" */
#define CPUID_VENDOR_AMD_2 0x69746e65 /* "enti" */
#define CPUID_VENDOR_AMD_3 0x444d4163 /* "cAMD" */
+#define CPUID_VENDOR_AMD "AuthenticAMD"
-#define CPUID_VENDOR_VIA_1 0x746e6543 /* "Cent" */
-#define CPUID_VENDOR_VIA_2 0x48727561 /* "aurH" */
-#define CPUID_VENDOR_VIA_3 0x736c7561 /* "auls" */
+#define CPUID_VENDOR_VIA "CentaurHauls"
#define CPUID_MWAIT_IBE (1 << 1) /* Interrupts can exit capability */
#define CPUID_MWAIT_EMX (1 << 0) /* enumeration supported */
@@ -835,7 +835,6 @@ typedef struct CPUX86State {
uint32_t cpuid_ext2_features;
uint32_t cpuid_ext3_features;
uint32_t cpuid_apic_id;
- int cpuid_vendor_override;
/* Store the results of Centaur's CPUID instructions */
uint32_t cpuid_xlevel2;
uint32_t cpuid_ext4_features;
@@ -1250,9 +1249,12 @@ void do_smm_enter(CPUX86State *env1);
void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
-void enable_kvm_pv_eoi(void);
+void disable_kvm_pv_eoi(void);
/* Return name of 32-bit register, from a R_* constant */
const char *get_register_name_32(unsigned int reg);
+uint32_t x86_cpu_apic_id_from_index(unsigned int cpu_index);
+void enable_compat_apic_id_mode(void);
+
#endif /* CPU_I386_H */
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 3acff40c47..9ebf1816d9 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -411,11 +411,19 @@ static void cpu_update_state(void *opaque, int running, RunState state)
}
}
+unsigned long kvm_arch_vcpu_id(CPUState *cs)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ return cpu->env.cpuid_apic_id;
+}
+
+#define KVM_MAX_CPUID_ENTRIES 100
+
int kvm_arch_init_vcpu(CPUState *cs)
{
struct {
struct kvm_cpuid2 cpuid;
- struct kvm_cpuid_entry2 entries[100];
+ struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
} QEMU_PACKED cpuid_data;
X86CPU *cpu = X86_CPU(cs);
CPUX86State *env = &cpu->env;
@@ -502,6 +510,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
for (i = 0; i <= limit; i++) {
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "unsupported level value: 0x%x\n", limit);
+ abort();
+ }
c = &cpuid_data.entries[cpuid_i++];
switch (i) {
@@ -516,6 +528,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
times = c->eax & 0xff;
for (j = 1; j < times; ++j) {
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:2):eax & 0xf = 0x%x\n", times);
+ abort();
+ }
c = &cpuid_data.entries[cpuid_i++];
c->function = i;
c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
@@ -544,6 +561,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
if (i == 0xd && c->eax == 0) {
continue;
}
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "cpuid_data is full, no space for "
+ "cpuid(eax:0x%x,ecx:0x%x)\n", i, j);
+ abort();
+ }
c = &cpuid_data.entries[cpuid_i++];
}
break;
@@ -557,6 +579,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
for (i = 0x80000000; i <= limit; i++) {
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit);
+ abort();
+ }
c = &cpuid_data.entries[cpuid_i++];
c->function = i;
@@ -569,6 +595,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused);
for (i = 0xC0000000; i <= limit; i++) {
+ if (cpuid_i == KVM_MAX_CPUID_ENTRIES) {
+ fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit);
+ abort();
+ }
c = &cpuid_data.entries[cpuid_i++];
c->function = i;
diff --git a/target-i386/topology.h b/target-i386/topology.h
new file mode 100644
index 0000000000..24ed525453
--- /dev/null
+++ b/target-i386/topology.h
@@ -0,0 +1,136 @@
+/*
+ * x86 CPU topology data structures and functions
+ *
+ * Copyright (c) 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef TARGET_I386_TOPOLOGY_H
+#define TARGET_I386_TOPOLOGY_H
+
+/* This file implements the APIC-ID-based CPU topology enumeration logic,
+ * documented at the following document:
+ * Intel® 64 Architecture Processor Topology Enumeration
+ * http://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration/
+ *
+ * This code should be compatible with AMD's "Extended Method" described at:
+ * AMD CPUID Specification (Publication #25481)
+ * Section 3: Multiple Core Calcuation
+ * as long as:
+ * nr_threads is set to 1;
+ * OFFSET_IDX is assumed to be 0;
+ * CPUID Fn8000_0008_ECX[ApicIdCoreIdSize[3:0]] is set to apicid_core_width().
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "qemu/bitops.h"
+
+/* APIC IDs can be 32-bit, but beware: APIC IDs > 255 require x2APIC support
+ */
+typedef uint32_t apic_id_t;
+
+/* Return the bit width needed for 'count' IDs
+ */
+static unsigned apicid_bitwidth_for_count(unsigned count)
+{
+ g_assert(count >= 1);
+ if (count == 1) {
+ return 0;
+ }
+ return bitops_flsl(count - 1) + 1;
+}
+
+/* Bit width of the SMT_ID (thread ID) field on the APIC ID
+ */
+static inline unsigned apicid_smt_width(unsigned nr_cores, unsigned nr_threads)
+{
+ return apicid_bitwidth_for_count(nr_threads);
+}
+
+/* Bit width of the Core_ID field
+ */
+static inline unsigned apicid_core_width(unsigned nr_cores, unsigned nr_threads)
+{
+ return apicid_bitwidth_for_count(nr_cores);
+}
+
+/* Bit offset of the Core_ID field
+ */
+static inline unsigned apicid_core_offset(unsigned nr_cores,
+ unsigned nr_threads)
+{
+ return apicid_smt_width(nr_cores, nr_threads);
+}
+
+/* Bit offset of the Pkg_ID (socket ID) field
+ */
+static inline unsigned apicid_pkg_offset(unsigned nr_cores, unsigned nr_threads)
+{
+ return apicid_core_offset(nr_cores, nr_threads) +
+ apicid_core_width(nr_cores, nr_threads);
+}
+
+/* Make APIC ID for the CPU based on Pkg_ID, Core_ID, SMT_ID
+ *
+ * The caller must make sure core_id < nr_cores and smt_id < nr_threads.
+ */
+static inline apic_id_t apicid_from_topo_ids(unsigned nr_cores,
+ unsigned nr_threads,
+ unsigned pkg_id,
+ unsigned core_id,
+ unsigned smt_id)
+{
+ return (pkg_id << apicid_pkg_offset(nr_cores, nr_threads)) |
+ (core_id << apicid_core_offset(nr_cores, nr_threads)) |
+ smt_id;
+}
+
+/* Calculate thread/core/package IDs for a specific topology,
+ * based on (contiguous) CPU index
+ */
+static inline void x86_topo_ids_from_idx(unsigned nr_cores,
+ unsigned nr_threads,
+ unsigned cpu_index,
+ unsigned *pkg_id,
+ unsigned *core_id,
+ unsigned *smt_id)
+{
+ unsigned core_index = cpu_index / nr_threads;
+ *smt_id = cpu_index % nr_threads;
+ *core_id = core_index % nr_cores;
+ *pkg_id = core_index / nr_cores;
+}
+
+/* Make APIC ID for the CPU 'cpu_index'
+ *
+ * 'cpu_index' is a sequential, contiguous ID for the CPU.
+ */
+static inline apic_id_t x86_apicid_from_cpu_idx(unsigned nr_cores,
+ unsigned nr_threads,
+ unsigned cpu_index)
+{
+ unsigned pkg_id, core_id, smt_id;
+ x86_topo_ids_from_idx(nr_cores, nr_threads, cpu_index,
+ &pkg_id, &core_id, &smt_id);
+ return apicid_from_topo_ids(nr_cores, nr_threads, pkg_id, core_id, smt_id);
+}
+
+#endif /* TARGET_I386_TOPOLOGY_H */
diff --git a/target-m68k/cpu.c b/target-m68k/cpu.c
index ce89674a08..5c7803181d 100644
--- a/target-m68k/cpu.c
+++ b/target-m68k/cpu.c
@@ -55,6 +55,22 @@ static void m68k_cpu_reset(CPUState *s)
/* CPU models */
+static ObjectClass *m68k_cpu_class_by_name(const char *cpu_model)
+{
+ ObjectClass *oc;
+
+ if (cpu_model == NULL) {
+ return NULL;
+ }
+
+ oc = object_class_by_name(cpu_model);
+ if (oc != NULL && (object_class_dynamic_cast(oc, TYPE_M68K_CPU) == NULL ||
+ object_class_is_abstract(oc))) {
+ return NULL;
+ }
+ return oc;
+}
+
static void m5206_cpu_initfn(Object *obj)
{
M68kCPU *cpu = M68K_CPU(obj);
@@ -134,6 +150,8 @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
mcc->parent_reset = cc->reset;
cc->reset = m68k_cpu_reset;
+
+ cc->class_by_name = m68k_cpu_class_by_name;
}
static void register_cpu_type(const M68kCPUInfo *info)
@@ -144,7 +162,7 @@ static void register_cpu_type(const M68kCPUInfo *info)
.instance_init = info->instance_init,
};
- type_register_static(&type_info);
+ type_register(&type_info);
}
static const TypeInfo m68k_cpu_type_info = {
diff --git a/target-m68k/helper.c b/target-m68k/helper.c
index 097fc789d4..f66e12b6ba 100644
--- a/target-m68k/helper.c
+++ b/target-m68k/helper.c
@@ -97,12 +97,14 @@ CPUM68KState *cpu_m68k_init(const char *cpu_model)
{
M68kCPU *cpu;
CPUM68KState *env;
+ ObjectClass *oc;
static int inited;
- if (object_class_by_name(cpu_model) == NULL) {
+ oc = cpu_class_by_name(TYPE_M68K_CPU, cpu_model);
+ if (oc == NULL) {
return NULL;
}
- cpu = M68K_CPU(object_new(cpu_model));
+ cpu = M68K_CPU(object_new(object_class_get_name(oc)));
env = &cpu->env;
if (!inited) {
diff --git a/target-openrisc/cpu.c b/target-openrisc/cpu.c
index 56544d8ab5..54876d904b 100644
--- a/target-openrisc/cpu.c
+++ b/target-openrisc/cpu.c
@@ -88,6 +88,23 @@ static void openrisc_cpu_initfn(Object *obj)
}
/* CPU models */
+
+static ObjectClass *openrisc_cpu_class_by_name(const char *cpu_model)
+{
+ ObjectClass *oc;
+
+ if (cpu_model == NULL) {
+ return NULL;
+ }
+
+ oc = object_class_by_name(cpu_model);
+ if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_OPENRISC_CPU) ||
+ object_class_is_abstract(oc))) {
+ return NULL;
+ }
+ return oc;
+}
+
static void or1200_initfn(Object *obj)
{
OpenRISCCPU *cpu = OPENRISC_CPU(obj);
@@ -120,6 +137,8 @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
occ->parent_reset = cc->reset;
cc->reset = openrisc_cpu_reset;
+
+ cc->class_by_name = openrisc_cpu_class_by_name;
}
static void cpu_register(const OpenRISCCPUInfo *info)
@@ -132,7 +151,7 @@ static void cpu_register(const OpenRISCCPUInfo *info)
.class_size = sizeof(OpenRISCCPUClass),
};
- type_register_static(&type_info);
+ type_register(&type_info);
}
static const TypeInfo openrisc_cpu_type_info = {
@@ -158,11 +177,13 @@ static void openrisc_cpu_register_types(void)
OpenRISCCPU *cpu_openrisc_init(const char *cpu_model)
{
OpenRISCCPU *cpu;
+ ObjectClass *oc;
- if (!object_class_by_name(cpu_model)) {
+ oc = openrisc_cpu_class_by_name(cpu_model);
+ if (oc == NULL) {
return NULL;
}
- cpu = OPENRISC_CPU(object_new(cpu_model));
+ cpu = OPENRISC_CPU(object_new(object_class_get_name(oc)));
cpu->env.cpu_model_str = cpu_model;
openrisc_cpu_realize(OBJECT(cpu), NULL);
@@ -170,11 +191,6 @@ OpenRISCCPU *cpu_openrisc_init(const char *cpu_model)
return cpu;
}
-typedef struct OpenRISCCPUList {
- fprintf_function cpu_fprintf;
- FILE *file;
-} OpenRISCCPUList;
-
/* Sort alphabetically by type name, except for "any". */
static gint openrisc_cpu_list_compare(gconstpointer a, gconstpointer b)
{
@@ -196,7 +212,7 @@ static gint openrisc_cpu_list_compare(gconstpointer a, gconstpointer b)
static void openrisc_cpu_list_entry(gpointer data, gpointer user_data)
{
ObjectClass *oc = data;
- OpenRISCCPUList *s = user_data;
+ CPUListState *s = user_data;
(*s->cpu_fprintf)(s->file, " %s\n",
object_class_get_name(oc));
@@ -204,7 +220,7 @@ static void openrisc_cpu_list_entry(gpointer data, gpointer user_data)
void cpu_openrisc_list(FILE *f, fprintf_function cpu_fprintf)
{
- OpenRISCCPUList s = {
+ CPUListState s = {
.file = f,
.cpu_fprintf = cpu_fprintf,
};
diff --git a/target-openrisc/exception_helper.c b/target-openrisc/exception_helper.c
index dab4148151..0c53b7755b 100644
--- a/target-openrisc/exception_helper.c
+++ b/target-openrisc/exception_helper.c
@@ -23,7 +23,7 @@
void HELPER(exception)(CPUOpenRISCState *env, uint32_t excp)
{
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
raise_exception(cpu, excp);
}
diff --git a/target-openrisc/fpu_helper.c b/target-openrisc/fpu_helper.c
index b184d5ef73..4615a366d1 100644
--- a/target-openrisc/fpu_helper.c
+++ b/target-openrisc/fpu_helper.c
@@ -68,7 +68,7 @@ static inline void update_fpcsr(OpenRISCCPU *cpu)
uint64_t HELPER(itofd)(CPUOpenRISCState *env, uint64_t val)
{
uint64_t itofd;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
set_float_exception_flags(0, &cpu->env.fp_status);
itofd = int32_to_float64(val, &cpu->env.fp_status);
@@ -80,7 +80,7 @@ uint64_t HELPER(itofd)(CPUOpenRISCState *env, uint64_t val)
uint32_t HELPER(itofs)(CPUOpenRISCState *env, uint32_t val)
{
uint32_t itofs;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
set_float_exception_flags(0, &cpu->env.fp_status);
itofs = int32_to_float32(val, &cpu->env.fp_status);
@@ -92,7 +92,7 @@ uint32_t HELPER(itofs)(CPUOpenRISCState *env, uint32_t val)
uint64_t HELPER(ftoid)(CPUOpenRISCState *env, uint64_t val)
{
uint64_t ftoid;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
set_float_exception_flags(0, &cpu->env.fp_status);
ftoid = float32_to_int64(val, &cpu->env.fp_status);
@@ -104,7 +104,7 @@ uint64_t HELPER(ftoid)(CPUOpenRISCState *env, uint64_t val)
uint32_t HELPER(ftois)(CPUOpenRISCState *env, uint32_t val)
{
uint32_t ftois;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
set_float_exception_flags(0, &cpu->env.fp_status);
ftois = float32_to_int32(val, &cpu->env.fp_status);
@@ -120,7 +120,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \
uint64_t fdt0, uint64_t fdt1) \
{ \
uint64_t result; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
result = float64_ ## name(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -131,7 +131,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \
uint32_t fdt0, uint32_t fdt1) \
{ \
uint32_t result; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
result = float32_ ## name(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -152,7 +152,7 @@ uint64_t helper_float_ ## name1 ## name2 ## _d(CPUOpenRISCState *env, \
{ \
uint64_t result, temp, hi, lo; \
uint32_t val1, val2; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
hi = env->fpmaddhi; \
lo = env->fpmaddlo; \
set_float_exception_flags(0, &cpu->env.fp_status); \
@@ -174,7 +174,7 @@ uint32_t helper_float_ ## name1 ## name2 ## _s(CPUOpenRISCState *env, \
{ \
uint64_t result, temp, hi, lo; \
uint32_t val1, val2; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
hi = cpu->env.fpmaddhi; \
lo = cpu->env.fpmaddlo; \
set_float_exception_flags(0, &cpu->env.fp_status); \
@@ -198,7 +198,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \
uint64_t fdt0, uint64_t fdt1) \
{ \
int res; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
res = float64_ ## name(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -209,7 +209,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \
uint32_t fdt0, uint32_t fdt1)\
{ \
int res; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
res = float32_ ## name(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -227,7 +227,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \
uint64_t fdt0, uint64_t fdt1) \
{ \
int res; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
res = !float64_eq_quiet(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -238,7 +238,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \
uint32_t fdt0, uint32_t fdt1) \
{ \
int res; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
res = !float32_eq_quiet(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -253,7 +253,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \
uint64_t fdt0, uint64_t fdt1) \
{ \
int res; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
res = !float64_le(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -264,7 +264,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \
uint32_t fdt0, uint32_t fdt1) \
{ \
int res; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
res = !float32_le(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -278,7 +278,7 @@ uint64_t helper_float_ ## name ## _d(CPUOpenRISCState *env, \
uint64_t fdt0, uint64_t fdt1) \
{ \
int res; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
res = !float64_lt(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
@@ -289,7 +289,7 @@ uint32_t helper_float_ ## name ## _s(CPUOpenRISCState *env, \
uint32_t fdt0, uint32_t fdt1) \
{ \
int res; \
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env)); \
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env); \
set_float_exception_flags(0, &cpu->env.fp_status); \
res = !float32_lt(fdt0, fdt1, &cpu->env.fp_status); \
update_fpcsr(cpu); \
diff --git a/target-openrisc/int_helper.c b/target-openrisc/int_helper.c
index 20f9837e6a..16cb5abfcd 100644
--- a/target-openrisc/int_helper.c
+++ b/target-openrisc/int_helper.c
@@ -48,7 +48,7 @@ uint32_t HELPER(mul32)(CPUOpenRISCState *env,
uint64_t result;
uint32_t high, cy;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
result = (uint64_t)ra * rb;
/* regisiers in or32 is 32bit, so 32 is NOT a magic number.
diff --git a/target-openrisc/interrupt_helper.c b/target-openrisc/interrupt_helper.c
index 79f5afed44..a176441b01 100644
--- a/target-openrisc/interrupt_helper.c
+++ b/target-openrisc/interrupt_helper.c
@@ -23,7 +23,7 @@
void HELPER(rfe)(CPUOpenRISCState *env)
{
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
#ifndef CONFIG_USER_ONLY
int need_flush_tlb = (cpu->env.sr & (SR_SM | SR_IME | SR_DME)) ^
(cpu->env.esr & (SR_SM | SR_IME | SR_DME));
diff --git a/target-openrisc/mmu.c b/target-openrisc/mmu.c
index 836465259a..d354e1f8b2 100644
--- a/target-openrisc/mmu.c
+++ b/target-openrisc/mmu.c
@@ -187,7 +187,7 @@ int cpu_openrisc_handle_mmu_fault(CPUOpenRISCState *env,
int ret = 0;
hwaddr physical = 0;
int prot = 0;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
ret = cpu_openrisc_get_phys_addr(cpu, &physical, &prot,
address, rw);
@@ -209,7 +209,7 @@ int cpu_openrisc_handle_mmu_fault(CPUOpenRISCState *env,
target_ulong address, int rw, int mmu_idx)
{
int ret = 0;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
cpu_openrisc_raise_mmu_exception(cpu, address, rw, ret);
ret = 1;
@@ -224,7 +224,7 @@ hwaddr cpu_get_phys_page_debug(CPUOpenRISCState *env,
{
hwaddr phys_addr;
int prot;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
if (cpu_openrisc_get_phys_addr(cpu, &phys_addr, &prot, addr, 0)) {
return -1;
diff --git a/target-openrisc/sys_helper.c b/target-openrisc/sys_helper.c
index f160dc397c..3c5f45ab75 100644
--- a/target-openrisc/sys_helper.c
+++ b/target-openrisc/sys_helper.c
@@ -30,7 +30,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
int spr = (ra | offset);
int idx;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
switch (spr) {
case TO_SPR(0, 0): /* VR */
@@ -177,7 +177,7 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
int spr = (ra | offset);
int idx;
- OpenRISCCPU *cpu = OPENRISC_CPU(ENV_GET_CPU(env));
+ OpenRISCCPU *cpu = openrisc_env_get_cpu(env);
switch (spr) {
case TO_SPR(0, 0): /* VR */
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 2f4f06818a..2c64c634f1 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -384,6 +384,11 @@ static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
#endif /* !defined (TARGET_PPC64) */
+unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+{
+ return cpu->cpu_index;
+}
+
int kvm_arch_init_vcpu(CPUState *cs)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 4f767c9751..e143af532a 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -10578,6 +10578,8 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
pcc->parent_reset = cc->reset;
cc->reset = ppc_cpu_reset;
+
+ cc->class_by_name = ppc_cpu_class_by_name;
}
static const TypeInfo ppc_cpu_type_info = {
diff --git a/target-s390x/Makefile.objs b/target-s390x/Makefile.objs
index e728abf759..3afb0b71f9 100644
--- a/target-s390x/Makefile.objs
+++ b/target-s390x/Makefile.objs
@@ -1,4 +1,4 @@
obj-y += translate.o helper.o cpu.o interrupt.o
obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o
-obj-$(CONFIG_SOFTMMU) += machine.o
+obj-$(CONFIG_SOFTMMU) += machine.o ioinst.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 1f2d94218a..9be4a475a3 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -50,6 +50,11 @@
#define MMU_USER_IDX 1
#define MAX_EXT_QUEUE 16
+#define MAX_IO_QUEUE 16
+#define MAX_MCHK_QUEUE 16
+
+#define PSW_MCHK_MASK 0x0004000000000000
+#define PSW_IO_MASK 0x0200000000000000
typedef struct PSW {
uint64_t mask;
@@ -62,6 +67,17 @@ typedef struct ExtQueue {
uint32_t param64;
} ExtQueue;
+typedef struct IOIntQueue {
+ uint16_t id;
+ uint16_t nr;
+ uint32_t parm;
+ uint32_t word;
+} IOIntQueue;
+
+typedef struct MchkQueue {
+ uint16_t type;
+} MchkQueue;
+
typedef struct CPUS390XState {
uint64_t regs[16]; /* GP registers */
CPU_DoubleU fregs[16]; /* FP registers */
@@ -93,9 +109,17 @@ typedef struct CPUS390XState {
uint64_t cregs[16]; /* control registers */
ExtQueue ext_queue[MAX_EXT_QUEUE];
- int pending_int;
+ IOIntQueue io_queue[MAX_IO_QUEUE][8];
+ MchkQueue mchk_queue[MAX_MCHK_QUEUE];
+ int pending_int;
int ext_index;
+ int io_index[8];
+ int mchk_index;
+
+ uint64_t ckc;
+ uint64_t cputm;
+ uint32_t todpr;
CPU_COMMON
@@ -123,6 +147,9 @@ static inline void cpu_clone_regs(CPUS390XState *env, target_ulong newsp)
}
#endif
+/* distinguish between 24 bit and 31 bit addressing */
+#define HIGH_ORDER_BIT 0x80000000
+
/* Interrupt Codes */
/* Program Interrupts */
#define PGM_OPERATION 0x0001
@@ -300,8 +327,27 @@ int cpu_s390x_handle_mmu_fault (CPUS390XState *env, target_ulong address, int rw
int mmu_idx);
#define cpu_handle_mmu_fault cpu_s390x_handle_mmu_fault
+#include "ioinst.h"
#ifndef CONFIG_USER_ONLY
+void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len,
+ int is_write);
+void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len,
+ int is_write);
+static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb)
+{
+ hwaddr addr = 0;
+ uint8_t reg;
+
+ reg = ipb >> 28;
+ if (reg > 0) {
+ addr = env->regs[reg];
+ }
+ addr += (ipb >> 16) & 0xfff;
+
+ return addr;
+}
+
void s390x_tod_timer(void *opaque);
void s390x_cpu_timer(void *opaque);
@@ -351,6 +397,114 @@ static inline unsigned s390_del_running_cpu(CPUS390XState *env)
void cpu_lock(void);
void cpu_unlock(void);
+typedef struct SubchDev SubchDev;
+
+#ifndef CONFIG_USER_ONLY
+SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
+ uint16_t schid);
+bool css_subch_visible(SubchDev *sch);
+void css_conditional_io_interrupt(SubchDev *sch);
+int css_do_stsch(SubchDev *sch, SCHIB *schib);
+bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid);
+int css_do_msch(SubchDev *sch, SCHIB *schib);
+int css_do_xsch(SubchDev *sch);
+int css_do_csch(SubchDev *sch);
+int css_do_hsch(SubchDev *sch);
+int css_do_ssch(SubchDev *sch, ORB *orb);
+int css_do_tsch(SubchDev *sch, IRB *irb);
+int css_do_stcrw(CRW *crw);
+int css_do_tpi(IOIntCode *int_code, int lowcore);
+int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
+ int rfmt, void *buf);
+void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo);
+int css_enable_mcsse(void);
+int css_enable_mss(void);
+int css_do_rsch(SubchDev *sch);
+int css_do_rchp(uint8_t cssid, uint8_t chpid);
+bool css_present(uint8_t cssid);
+#else
+static inline SubchDev *css_find_subch(uint8_t m, uint8_t cssid, uint8_t ssid,
+ uint16_t schid)
+{
+ return NULL;
+}
+static inline bool css_subch_visible(SubchDev *sch)
+{
+ return false;
+}
+static inline void css_conditional_io_interrupt(SubchDev *sch)
+{
+}
+static inline int css_do_stsch(SubchDev *sch, SCHIB *schib)
+{
+ return -ENODEV;
+}
+static inline bool css_schid_final(uint8_t cssid, uint8_t ssid, uint16_t schid)
+{
+ return true;
+}
+static inline int css_do_msch(SubchDev *sch, SCHIB *schib)
+{
+ return -ENODEV;
+}
+static inline int css_do_xsch(SubchDev *sch)
+{
+ return -ENODEV;
+}
+static inline int css_do_csch(SubchDev *sch)
+{
+ return -ENODEV;
+}
+static inline int css_do_hsch(SubchDev *sch)
+{
+ return -ENODEV;
+}
+static inline int css_do_ssch(SubchDev *sch, ORB *orb)
+{
+ return -ENODEV;
+}
+static inline int css_do_tsch(SubchDev *sch, IRB *irb)
+{
+ return -ENODEV;
+}
+static inline int css_do_stcrw(CRW *crw)
+{
+ return 1;
+}
+static inline int css_do_tpi(IOIntCode *int_code, int lowcore)
+{
+ return 0;
+}
+static inline int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid,
+ int rfmt, uint8_t l_chpid, void *buf)
+{
+ return 0;
+}
+static inline void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo)
+{
+}
+static inline int css_enable_mss(void)
+{
+ return -EINVAL;
+}
+static inline int css_enable_mcsse(void)
+{
+ return -EINVAL;
+}
+static inline int css_do_rsch(SubchDev *sch)
+{
+ return -ENODEV;
+}
+static inline int css_do_rchp(uint8_t cssid, uint8_t chpid)
+{
+ return -ENODEV;
+}
+static inline bool css_present(uint8_t cssid)
+{
+ return false;
+}
+#endif
+
static inline void cpu_set_tls(CPUS390XState *env, target_ulong newtls)
{
env->aregs[0] = newtls >> 32;
@@ -370,10 +524,14 @@ void s390_cpu_list(FILE *f, fprintf_function cpu_fprintf);
#define EXCP_EXT 1 /* external interrupt */
#define EXCP_SVC 2 /* supervisor call (syscall) */
#define EXCP_PGM 3 /* program interruption */
+#define EXCP_IO 7 /* I/O interrupt */
+#define EXCP_MCHK 8 /* machine check */
#define INTERRUPT_EXT (1 << 0)
#define INTERRUPT_TOD (1 << 1)
#define INTERRUPT_CPUTIMER (1 << 2)
+#define INTERRUPT_IO (1 << 3)
+#define INTERRUPT_MCHK (1 << 4)
/* Program Status Word. */
#define S390_PSWM_REGNUM 0
@@ -836,6 +994,45 @@ static inline void cpu_inject_ext(CPUS390XState *env, uint32_t code, uint32_t pa
cpu_interrupt(env, CPU_INTERRUPT_HARD);
}
+static inline void cpu_inject_io(CPUS390XState *env, uint16_t subchannel_id,
+ uint16_t subchannel_number,
+ uint32_t io_int_parm, uint32_t io_int_word)
+{
+ int isc = ffs(io_int_word << 2) - 1;
+
+ if (env->io_index[isc] == MAX_IO_QUEUE - 1) {
+ /* ugh - can't queue anymore. Let's drop. */
+ return;
+ }
+
+ env->io_index[isc]++;
+ assert(env->io_index[isc] < MAX_IO_QUEUE);
+
+ env->io_queue[env->io_index[isc]][isc].id = subchannel_id;
+ env->io_queue[env->io_index[isc]][isc].nr = subchannel_number;
+ env->io_queue[env->io_index[isc]][isc].parm = io_int_parm;
+ env->io_queue[env->io_index[isc]][isc].word = io_int_word;
+
+ env->pending_int |= INTERRUPT_IO;
+ cpu_interrupt(env, CPU_INTERRUPT_HARD);
+}
+
+static inline void cpu_inject_crw_mchk(CPUS390XState *env)
+{
+ if (env->mchk_index == MAX_MCHK_QUEUE - 1) {
+ /* ugh - can't queue anymore. Let's drop. */
+ return;
+ }
+
+ env->mchk_index++;
+ assert(env->mchk_index < MAX_MCHK_QUEUE);
+
+ env->mchk_queue[env->mchk_index].type = 1;
+
+ env->pending_int |= INTERRUPT_MCHK;
+ cpu_interrupt(env, CPU_INTERRUPT_HARD);
+}
+
static inline bool cpu_has_work(CPUState *cpu)
{
CPUS390XState *env = &S390_CPU(cpu)->env;
@@ -859,4 +1056,52 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilen);
void QEMU_NORETURN runtime_exception(CPUS390XState *env, int excp,
uintptr_t retaddr);
+#include <sysemu/kvm.h>
+
+#ifdef CONFIG_KVM
+void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id,
+ uint16_t subchannel_nr, uint32_t io_int_parm,
+ uint32_t io_int_word);
+void kvm_s390_crw_mchk(S390CPU *cpu);
+void kvm_s390_enable_css_support(S390CPU *cpu);
+#else
+static inline void kvm_s390_io_interrupt(S390CPU *cpu,
+ uint16_t subchannel_id,
+ uint16_t subchannel_nr,
+ uint32_t io_int_parm,
+ uint32_t io_int_word)
+{
+}
+static inline void kvm_s390_crw_mchk(S390CPU *cpu)
+{
+}
+static inline void kvm_s390_enable_css_support(S390CPU *cpu)
+{
+}
+#endif
+
+static inline void s390_io_interrupt(S390CPU *cpu,
+ uint16_t subchannel_id,
+ uint16_t subchannel_nr,
+ uint32_t io_int_parm,
+ uint32_t io_int_word)
+{
+ if (kvm_enabled()) {
+ kvm_s390_io_interrupt(cpu, subchannel_id, subchannel_nr, io_int_parm,
+ io_int_word);
+ } else {
+ cpu_inject_io(&cpu->env, subchannel_id, subchannel_nr, io_int_parm,
+ io_int_word);
+ }
+}
+
+static inline void s390_crw_mchk(S390CPU *cpu)
+{
+ if (kvm_enabled()) {
+ kvm_s390_crw_mchk(cpu);
+ } else {
+ cpu_inject_crw_mchk(&cpu->env);
+ }
+}
+
#endif
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index 9a132e6d2c..857c89725c 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -471,13 +471,56 @@ static uint64_t get_psw_mask(CPUS390XState *env)
return r;
}
+static LowCore *cpu_map_lowcore(CPUS390XState *env)
+{
+ LowCore *lowcore;
+ hwaddr len = sizeof(LowCore);
+
+ lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+
+ if (len < sizeof(LowCore)) {
+ cpu_abort(env, "Could not map lowcore\n");
+ }
+
+ return lowcore;
+}
+
+static void cpu_unmap_lowcore(LowCore *lowcore)
+{
+ cpu_physical_memory_unmap(lowcore, sizeof(LowCore), 1, sizeof(LowCore));
+}
+
+void *s390_cpu_physical_memory_map(CPUS390XState *env, hwaddr addr, hwaddr *len,
+ int is_write)
+{
+ hwaddr start = addr;
+
+ /* Mind the prefix area. */
+ if (addr < 8192) {
+ /* Map the lowcore. */
+ start += env->psa;
+ *len = MIN(*len, 8192 - addr);
+ } else if ((addr >= env->psa) && (addr < env->psa + 8192)) {
+ /* Map the 0 page. */
+ start -= env->psa;
+ *len = MIN(*len, 8192 - start);
+ }
+
+ return cpu_physical_memory_map(start, len, is_write);
+}
+
+void s390_cpu_physical_memory_unmap(CPUS390XState *env, void *addr, hwaddr len,
+ int is_write)
+{
+ cpu_physical_memory_unmap(addr, len, is_write, len);
+}
+
static void do_svc_interrupt(CPUS390XState *env)
{
uint64_t mask, addr;
LowCore *lowcore;
- hwaddr len = TARGET_PAGE_SIZE;
- lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+ lowcore = cpu_map_lowcore(env);
lowcore->svc_code = cpu_to_be16(env->int_svc_code);
lowcore->svc_ilen = cpu_to_be16(env->int_svc_ilen);
@@ -486,7 +529,7 @@ static void do_svc_interrupt(CPUS390XState *env)
mask = be64_to_cpu(lowcore->svc_new_psw.mask);
addr = be64_to_cpu(lowcore->svc_new_psw.addr);
- cpu_physical_memory_unmap(lowcore, len, 1, len);
+ cpu_unmap_lowcore(lowcore);
load_psw(env, mask, addr);
}
@@ -495,7 +538,6 @@ static void do_program_interrupt(CPUS390XState *env)
{
uint64_t mask, addr;
LowCore *lowcore;
- hwaddr len = TARGET_PAGE_SIZE;
int ilen = env->int_pgm_ilen;
switch (ilen) {
@@ -513,7 +555,7 @@ static void do_program_interrupt(CPUS390XState *env)
qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilen=%d\n",
__func__, env->int_pgm_code, ilen);
- lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+ lowcore = cpu_map_lowcore(env);
lowcore->pgm_ilen = cpu_to_be16(ilen);
lowcore->pgm_code = cpu_to_be16(env->int_pgm_code);
@@ -522,7 +564,7 @@ static void do_program_interrupt(CPUS390XState *env)
mask = be64_to_cpu(lowcore->program_new_psw.mask);
addr = be64_to_cpu(lowcore->program_new_psw.addr);
- cpu_physical_memory_unmap(lowcore, len, 1, len);
+ cpu_unmap_lowcore(lowcore);
DPRINTF("%s: %x %x %" PRIx64 " %" PRIx64 "\n", __func__,
env->int_pgm_code, ilen, env->psw.mask,
@@ -537,7 +579,6 @@ static void do_ext_interrupt(CPUS390XState *env)
{
uint64_t mask, addr;
LowCore *lowcore;
- hwaddr len = TARGET_PAGE_SIZE;
ExtQueue *q;
if (!(env->psw.mask & PSW_MASK_EXT)) {
@@ -549,7 +590,7 @@ static void do_ext_interrupt(CPUS390XState *env)
}
q = &env->ext_queue[env->ext_index];
- lowcore = cpu_physical_memory_map(env->psa, &len, 1);
+ lowcore = cpu_map_lowcore(env);
lowcore->ext_int_code = cpu_to_be16(q->code);
lowcore->ext_params = cpu_to_be32(q->param);
@@ -560,7 +601,7 @@ static void do_ext_interrupt(CPUS390XState *env)
mask = be64_to_cpu(lowcore->external_new_psw.mask);
addr = be64_to_cpu(lowcore->external_new_psw.addr);
- cpu_physical_memory_unmap(lowcore, len, 1, len);
+ cpu_unmap_lowcore(lowcore);
env->ext_index--;
if (env->ext_index == -1) {
@@ -573,12 +614,140 @@ static void do_ext_interrupt(CPUS390XState *env)
load_psw(env, mask, addr);
}
+static void do_io_interrupt(CPUS390XState *env)
+{
+ uint64_t mask, addr;
+ LowCore *lowcore;
+ IOIntQueue *q;
+ uint8_t isc;
+ int disable = 1;
+ int found = 0;
+
+ if (!(env->psw.mask & PSW_MASK_IO)) {
+ cpu_abort(env, "I/O int w/o I/O mask\n");
+ }
+
+ for (isc = 0; isc < ARRAY_SIZE(env->io_index); isc++) {
+ if (env->io_index[isc] < 0) {
+ continue;
+ }
+ if (env->io_index[isc] > MAX_IO_QUEUE) {
+ cpu_abort(env, "I/O queue overrun for isc %d: %d\n",
+ isc, env->io_index[isc]);
+ }
+
+ q = &env->io_queue[env->io_index[isc]][isc];
+ if (!(env->cregs[6] & q->word)) {
+ disable = 0;
+ continue;
+ }
+ found = 1;
+ lowcore = cpu_map_lowcore(env);
+
+ lowcore->subchannel_id = cpu_to_be16(q->id);
+ lowcore->subchannel_nr = cpu_to_be16(q->nr);
+ lowcore->io_int_parm = cpu_to_be32(q->parm);
+ lowcore->io_int_word = cpu_to_be32(q->word);
+ lowcore->io_old_psw.mask = cpu_to_be64(get_psw_mask(env));
+ lowcore->io_old_psw.addr = cpu_to_be64(env->psw.addr);
+ mask = be64_to_cpu(lowcore->io_new_psw.mask);
+ addr = be64_to_cpu(lowcore->io_new_psw.addr);
+
+ cpu_unmap_lowcore(lowcore);
+
+ env->io_index[isc]--;
+ if (env->io_index >= 0) {
+ disable = 0;
+ }
+ break;
+ }
+
+ if (disable) {
+ env->pending_int &= ~INTERRUPT_IO;
+ }
+
+ if (found) {
+ DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
+ env->psw.mask, env->psw.addr);
+ load_psw(env, mask, addr);
+ }
+}
+
+static void do_mchk_interrupt(CPUS390XState *env)
+{
+ uint64_t mask, addr;
+ LowCore *lowcore;
+ MchkQueue *q;
+ int i;
+
+ if (!(env->psw.mask & PSW_MASK_MCHECK)) {
+ cpu_abort(env, "Machine check w/o mchk mask\n");
+ }
+
+ if (env->mchk_index < 0 || env->mchk_index > MAX_MCHK_QUEUE) {
+ cpu_abort(env, "Mchk queue overrun: %d\n", env->mchk_index);
+ }
+
+ q = &env->mchk_queue[env->mchk_index];
+
+ if (q->type != 1) {
+ /* Don't know how to handle this... */
+ cpu_abort(env, "Unknown machine check type %d\n", q->type);
+ }
+ if (!(env->cregs[14] & (1 << 28))) {
+ /* CRW machine checks disabled */
+ return;
+ }
+
+ lowcore = cpu_map_lowcore(env);
+
+ for (i = 0; i < 16; i++) {
+ lowcore->floating_pt_save_area[i] = cpu_to_be64(env->fregs[i].ll);
+ lowcore->gpregs_save_area[i] = cpu_to_be64(env->regs[i]);
+ lowcore->access_regs_save_area[i] = cpu_to_be32(env->aregs[i]);
+ lowcore->cregs_save_area[i] = cpu_to_be64(env->cregs[i]);
+ }
+ lowcore->prefixreg_save_area = cpu_to_be32(env->psa);
+ lowcore->fpt_creg_save_area = cpu_to_be32(env->fpc);
+ lowcore->tod_progreg_save_area = cpu_to_be32(env->todpr);
+ lowcore->cpu_timer_save_area[0] = cpu_to_be32(env->cputm >> 32);
+ lowcore->cpu_timer_save_area[1] = cpu_to_be32((uint32_t)env->cputm);
+ lowcore->clock_comp_save_area[0] = cpu_to_be32(env->ckc >> 32);
+ lowcore->clock_comp_save_area[1] = cpu_to_be32((uint32_t)env->ckc);
+
+ lowcore->mcck_interruption_code[0] = cpu_to_be32(0x00400f1d);
+ lowcore->mcck_interruption_code[1] = cpu_to_be32(0x40330000);
+ lowcore->mcck_old_psw.mask = cpu_to_be64(get_psw_mask(env));
+ lowcore->mcck_old_psw.addr = cpu_to_be64(env->psw.addr);
+ mask = be64_to_cpu(lowcore->mcck_new_psw.mask);
+ addr = be64_to_cpu(lowcore->mcck_new_psw.addr);
+
+ cpu_unmap_lowcore(lowcore);
+
+ env->mchk_index--;
+ if (env->mchk_index == -1) {
+ env->pending_int &= ~INTERRUPT_MCHK;
+ }
+
+ DPRINTF("%s: %" PRIx64 " %" PRIx64 "\n", __func__,
+ env->psw.mask, env->psw.addr);
+
+ load_psw(env, mask, addr);
+}
+
void do_interrupt(CPUS390XState *env)
{
qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n",
__func__, env->exception_index, env->psw.addr);
s390_add_running_cpu(env);
+ /* handle machine checks */
+ if ((env->psw.mask & PSW_MASK_MCHECK) &&
+ (env->exception_index == -1)) {
+ if (env->pending_int & INTERRUPT_MCHK) {
+ env->exception_index = EXCP_MCHK;
+ }
+ }
/* handle external interrupts */
if ((env->psw.mask & PSW_MASK_EXT) &&
env->exception_index == -1) {
@@ -597,6 +766,13 @@ void do_interrupt(CPUS390XState *env)
env->pending_int &= ~INTERRUPT_TOD;
}
}
+ /* handle I/O interrupts */
+ if ((env->psw.mask & PSW_MASK_IO) &&
+ (env->exception_index == -1)) {
+ if (env->pending_int & INTERRUPT_IO) {
+ env->exception_index = EXCP_IO;
+ }
+ }
switch (env->exception_index) {
case EXCP_PGM:
@@ -608,6 +784,12 @@ void do_interrupt(CPUS390XState *env)
case EXCP_EXT:
do_ext_interrupt(env);
break;
+ case EXCP_IO:
+ do_io_interrupt(env);
+ break;
+ case EXCP_MCHK:
+ do_mchk_interrupt(env);
+ break;
}
env->exception_index = -1;
diff --git a/target-s390x/ioinst.c b/target-s390x/ioinst.c
new file mode 100644
index 0000000000..e3531f365e
--- /dev/null
+++ b/target-s390x/ioinst.c
@@ -0,0 +1,761 @@
+/*
+ * I/O instructions for S/390
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include <sys/types.h>
+
+#include "cpu.h"
+#include "ioinst.h"
+#include "trace.h"
+
+int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
+ int *schid)
+{
+ if (!IOINST_SCHID_ONE(value)) {
+ return -EINVAL;
+ }
+ if (!IOINST_SCHID_M(value)) {
+ if (IOINST_SCHID_CSSID(value)) {
+ return -EINVAL;
+ }
+ *cssid = 0;
+ *m = 0;
+ } else {
+ *cssid = IOINST_SCHID_CSSID(value);
+ *m = 1;
+ }
+ *ssid = IOINST_SCHID_SSID(value);
+ *schid = IOINST_SCHID_NR(value);
+ return 0;
+}
+
+int ioinst_handle_xsch(CPUS390XState *env, uint64_t reg1)
+{
+ int cssid, ssid, schid, m;
+ SubchDev *sch;
+ int ret = -ENODEV;
+ int cc;
+
+ if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ trace_ioinst_sch_id("xsch", cssid, ssid, schid);
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (sch && css_subch_visible(sch)) {
+ ret = css_do_xsch(sch);
+ }
+ switch (ret) {
+ case -ENODEV:
+ cc = 3;
+ break;
+ case -EBUSY:
+ cc = 2;
+ break;
+ case 0:
+ cc = 0;
+ break;
+ default:
+ cc = 1;
+ break;
+ }
+
+ return cc;
+}
+
+int ioinst_handle_csch(CPUS390XState *env, uint64_t reg1)
+{
+ int cssid, ssid, schid, m;
+ SubchDev *sch;
+ int ret = -ENODEV;
+ int cc;
+
+ if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ trace_ioinst_sch_id("csch", cssid, ssid, schid);
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (sch && css_subch_visible(sch)) {
+ ret = css_do_csch(sch);
+ }
+ if (ret == -ENODEV) {
+ cc = 3;
+ } else {
+ cc = 0;
+ }
+ return cc;
+}
+
+int ioinst_handle_hsch(CPUS390XState *env, uint64_t reg1)
+{
+ int cssid, ssid, schid, m;
+ SubchDev *sch;
+ int ret = -ENODEV;
+ int cc;
+
+ if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ trace_ioinst_sch_id("hsch", cssid, ssid, schid);
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (sch && css_subch_visible(sch)) {
+ ret = css_do_hsch(sch);
+ }
+ switch (ret) {
+ case -ENODEV:
+ cc = 3;
+ break;
+ case -EBUSY:
+ cc = 2;
+ break;
+ case 0:
+ cc = 0;
+ break;
+ default:
+ cc = 1;
+ break;
+ }
+
+ return cc;
+}
+
+static int ioinst_schib_valid(SCHIB *schib)
+{
+ if ((schib->pmcw.flags & PMCW_FLAGS_MASK_INVALID) ||
+ (schib->pmcw.chars & PMCW_CHARS_MASK_INVALID)) {
+ return 0;
+ }
+ /* Disallow extended measurements for now. */
+ if (schib->pmcw.chars & PMCW_CHARS_MASK_XMWME) {
+ return 0;
+ }
+ return 1;
+}
+
+int ioinst_handle_msch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+ int cssid, ssid, schid, m;
+ SubchDev *sch;
+ SCHIB *schib;
+ uint64_t addr;
+ int ret = -ENODEV;
+ int cc;
+ hwaddr len = sizeof(*schib);
+
+ if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ trace_ioinst_sch_id("msch", cssid, ssid, schid);
+ addr = decode_basedisp_s(env, ipb);
+ schib = s390_cpu_physical_memory_map(env, addr, &len, 0);
+ if (!schib || len != sizeof(*schib)) {
+ program_interrupt(env, PGM_SPECIFICATION, 2);
+ cc = -EIO;
+ goto out;
+ }
+ if (!ioinst_schib_valid(schib)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ cc = -EIO;
+ goto out;
+ }
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (sch && css_subch_visible(sch)) {
+ ret = css_do_msch(sch, schib);
+ }
+ switch (ret) {
+ case -ENODEV:
+ cc = 3;
+ break;
+ case -EBUSY:
+ cc = 2;
+ break;
+ case 0:
+ cc = 0;
+ break;
+ default:
+ cc = 1;
+ break;
+ }
+out:
+ s390_cpu_physical_memory_unmap(env, schib, len, 0);
+ return cc;
+}
+
+static void copy_orb_from_guest(ORB *dest, const ORB *src)
+{
+ dest->intparm = be32_to_cpu(src->intparm);
+ dest->ctrl0 = be16_to_cpu(src->ctrl0);
+ dest->lpm = src->lpm;
+ dest->ctrl1 = src->ctrl1;
+ dest->cpa = be32_to_cpu(src->cpa);
+}
+
+static int ioinst_orb_valid(ORB *orb)
+{
+ if ((orb->ctrl0 & ORB_CTRL0_MASK_INVALID) ||
+ (orb->ctrl1 & ORB_CTRL1_MASK_INVALID)) {
+ return 0;
+ }
+ if ((orb->cpa & HIGH_ORDER_BIT) != 0) {
+ return 0;
+ }
+ return 1;
+}
+
+int ioinst_handle_ssch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+ int cssid, ssid, schid, m;
+ SubchDev *sch;
+ ORB *orig_orb, orb;
+ uint64_t addr;
+ int ret = -ENODEV;
+ int cc;
+ hwaddr len = sizeof(*orig_orb);
+
+ if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ trace_ioinst_sch_id("ssch", cssid, ssid, schid);
+ addr = decode_basedisp_s(env, ipb);
+ orig_orb = s390_cpu_physical_memory_map(env, addr, &len, 0);
+ if (!orig_orb || len != sizeof(*orig_orb)) {
+ program_interrupt(env, PGM_SPECIFICATION, 2);
+ cc = -EIO;
+ goto out;
+ }
+ copy_orb_from_guest(&orb, orig_orb);
+ if (!ioinst_orb_valid(&orb)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ cc = -EIO;
+ goto out;
+ }
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (sch && css_subch_visible(sch)) {
+ ret = css_do_ssch(sch, &orb);
+ }
+ switch (ret) {
+ case -ENODEV:
+ cc = 3;
+ break;
+ case -EBUSY:
+ cc = 2;
+ break;
+ case 0:
+ cc = 0;
+ break;
+ default:
+ cc = 1;
+ break;
+ }
+
+out:
+ s390_cpu_physical_memory_unmap(env, orig_orb, len, 0);
+ return cc;
+}
+
+int ioinst_handle_stcrw(CPUS390XState *env, uint32_t ipb)
+{
+ CRW *crw;
+ uint64_t addr;
+ int cc;
+ hwaddr len = sizeof(*crw);
+
+ addr = decode_basedisp_s(env, ipb);
+ crw = s390_cpu_physical_memory_map(env, addr, &len, 1);
+ if (!crw || len != sizeof(*crw)) {
+ program_interrupt(env, PGM_SPECIFICATION, 2);
+ cc = -EIO;
+ goto out;
+ }
+ cc = css_do_stcrw(crw);
+ /* 0 - crw stored, 1 - zeroes stored */
+out:
+ s390_cpu_physical_memory_unmap(env, crw, len, 1);
+ return cc;
+}
+
+int ioinst_handle_stsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+ int cssid, ssid, schid, m;
+ SubchDev *sch;
+ uint64_t addr;
+ int cc;
+ SCHIB *schib;
+ hwaddr len = sizeof(*schib);
+
+ if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ trace_ioinst_sch_id("stsch", cssid, ssid, schid);
+ addr = decode_basedisp_s(env, ipb);
+ schib = s390_cpu_physical_memory_map(env, addr, &len, 1);
+ if (!schib || len != sizeof(*schib)) {
+ program_interrupt(env, PGM_SPECIFICATION, 2);
+ cc = -EIO;
+ goto out;
+ }
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (sch) {
+ if (css_subch_visible(sch)) {
+ css_do_stsch(sch, schib);
+ cc = 0;
+ } else {
+ /* Indicate no more subchannels in this css/ss */
+ cc = 3;
+ }
+ } else {
+ if (css_schid_final(cssid, ssid, schid)) {
+ cc = 3; /* No more subchannels in this css/ss */
+ } else {
+ /* Store an empty schib. */
+ memset(schib, 0, sizeof(*schib));
+ cc = 0;
+ }
+ }
+out:
+ s390_cpu_physical_memory_unmap(env, schib, len, 1);
+ return cc;
+}
+
+int ioinst_handle_tsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb)
+{
+ int cssid, ssid, schid, m;
+ SubchDev *sch;
+ IRB *irb;
+ uint64_t addr;
+ int ret = -ENODEV;
+ int cc;
+ hwaddr len = sizeof(*irb);
+
+ if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ trace_ioinst_sch_id("tsch", cssid, ssid, schid);
+ addr = decode_basedisp_s(env, ipb);
+ irb = s390_cpu_physical_memory_map(env, addr, &len, 1);
+ if (!irb || len != sizeof(*irb)) {
+ program_interrupt(env, PGM_SPECIFICATION, 2);
+ cc = -EIO;
+ goto out;
+ }
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (sch && css_subch_visible(sch)) {
+ ret = css_do_tsch(sch, irb);
+ /* 0 - status pending, 1 - not status pending */
+ cc = ret;
+ } else {
+ cc = 3;
+ }
+out:
+ s390_cpu_physical_memory_unmap(env, irb, sizeof(*irb), 1);
+ return cc;
+}
+
+typedef struct ChscReq {
+ uint16_t len;
+ uint16_t command;
+ uint32_t param0;
+ uint32_t param1;
+ uint32_t param2;
+} QEMU_PACKED ChscReq;
+
+typedef struct ChscResp {
+ uint16_t len;
+ uint16_t code;
+ uint32_t param;
+ char data[0];
+} QEMU_PACKED ChscResp;
+
+#define CHSC_MIN_RESP_LEN 0x0008
+
+#define CHSC_SCPD 0x0002
+#define CHSC_SCSC 0x0010
+#define CHSC_SDA 0x0031
+
+#define CHSC_SCPD_0_M 0x20000000
+#define CHSC_SCPD_0_C 0x10000000
+#define CHSC_SCPD_0_FMT 0x0f000000
+#define CHSC_SCPD_0_CSSID 0x00ff0000
+#define CHSC_SCPD_0_RFMT 0x00000f00
+#define CHSC_SCPD_0_RES 0xc000f000
+#define CHSC_SCPD_1_RES 0xffffff00
+#define CHSC_SCPD_01_CHPID 0x000000ff
+static void ioinst_handle_chsc_scpd(ChscReq *req, ChscResp *res)
+{
+ uint16_t len = be16_to_cpu(req->len);
+ uint32_t param0 = be32_to_cpu(req->param0);
+ uint32_t param1 = be32_to_cpu(req->param1);
+ uint16_t resp_code;
+ int rfmt;
+ uint16_t cssid;
+ uint8_t f_chpid, l_chpid;
+ int desc_size;
+ int m;
+
+ rfmt = (param0 & CHSC_SCPD_0_RFMT) >> 8;
+ if ((rfmt == 0) || (rfmt == 1)) {
+ rfmt = !!(param0 & CHSC_SCPD_0_C);
+ }
+ if ((len != 0x0010) || (param0 & CHSC_SCPD_0_RES) ||
+ (param1 & CHSC_SCPD_1_RES) || req->param2) {
+ resp_code = 0x0003;
+ goto out_err;
+ }
+ if (param0 & CHSC_SCPD_0_FMT) {
+ resp_code = 0x0007;
+ goto out_err;
+ }
+ cssid = (param0 & CHSC_SCPD_0_CSSID) >> 16;
+ m = param0 & CHSC_SCPD_0_M;
+ if (cssid != 0) {
+ if (!m || !css_present(cssid)) {
+ resp_code = 0x0008;
+ goto out_err;
+ }
+ }
+ f_chpid = param0 & CHSC_SCPD_01_CHPID;
+ l_chpid = param1 & CHSC_SCPD_01_CHPID;
+ if (l_chpid < f_chpid) {
+ resp_code = 0x0003;
+ goto out_err;
+ }
+ /* css_collect_chp_desc() is endian-aware */
+ desc_size = css_collect_chp_desc(m, cssid, f_chpid, l_chpid, rfmt,
+ &res->data);
+ res->code = cpu_to_be16(0x0001);
+ res->len = cpu_to_be16(8 + desc_size);
+ res->param = cpu_to_be32(rfmt);
+ return;
+
+ out_err:
+ res->code = cpu_to_be16(resp_code);
+ res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+ res->param = cpu_to_be32(rfmt);
+}
+
+#define CHSC_SCSC_0_M 0x20000000
+#define CHSC_SCSC_0_FMT 0x000f0000
+#define CHSC_SCSC_0_CSSID 0x0000ff00
+#define CHSC_SCSC_0_RES 0xdff000ff
+static void ioinst_handle_chsc_scsc(ChscReq *req, ChscResp *res)
+{
+ uint16_t len = be16_to_cpu(req->len);
+ uint32_t param0 = be32_to_cpu(req->param0);
+ uint8_t cssid;
+ uint16_t resp_code;
+ uint32_t general_chars[510];
+ uint32_t chsc_chars[508];
+
+ if (len != 0x0010) {
+ resp_code = 0x0003;
+ goto out_err;
+ }
+
+ if (param0 & CHSC_SCSC_0_FMT) {
+ resp_code = 0x0007;
+ goto out_err;
+ }
+ cssid = (param0 & CHSC_SCSC_0_CSSID) >> 8;
+ if (cssid != 0) {
+ if (!(param0 & CHSC_SCSC_0_M) || !css_present(cssid)) {
+ resp_code = 0x0008;
+ goto out_err;
+ }
+ }
+ if ((param0 & CHSC_SCSC_0_RES) || req->param1 || req->param2) {
+ resp_code = 0x0003;
+ goto out_err;
+ }
+ res->code = cpu_to_be16(0x0001);
+ res->len = cpu_to_be16(4080);
+ res->param = 0;
+
+ memset(general_chars, 0, sizeof(general_chars));
+ memset(chsc_chars, 0, sizeof(chsc_chars));
+
+ general_chars[0] = cpu_to_be32(0x03000000);
+ general_chars[1] = cpu_to_be32(0x00059000);
+
+ chsc_chars[0] = cpu_to_be32(0x40000000);
+ chsc_chars[3] = cpu_to_be32(0x00040000);
+
+ memcpy(res->data, general_chars, sizeof(general_chars));
+ memcpy(res->data + sizeof(general_chars), chsc_chars, sizeof(chsc_chars));
+ return;
+
+ out_err:
+ res->code = cpu_to_be16(resp_code);
+ res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+ res->param = 0;
+}
+
+#define CHSC_SDA_0_FMT 0x0f000000
+#define CHSC_SDA_0_OC 0x0000ffff
+#define CHSC_SDA_0_RES 0xf0ff0000
+#define CHSC_SDA_OC_MCSSE 0x0
+#define CHSC_SDA_OC_MSS 0x2
+static void ioinst_handle_chsc_sda(ChscReq *req, ChscResp *res)
+{
+ uint16_t resp_code = 0x0001;
+ uint16_t len = be16_to_cpu(req->len);
+ uint32_t param0 = be32_to_cpu(req->param0);
+ uint16_t oc;
+ int ret;
+
+ if ((len != 0x0400) || (param0 & CHSC_SDA_0_RES)) {
+ resp_code = 0x0003;
+ goto out;
+ }
+
+ if (param0 & CHSC_SDA_0_FMT) {
+ resp_code = 0x0007;
+ goto out;
+ }
+
+ oc = param0 & CHSC_SDA_0_OC;
+ switch (oc) {
+ case CHSC_SDA_OC_MCSSE:
+ ret = css_enable_mcsse();
+ if (ret == -EINVAL) {
+ resp_code = 0x0101;
+ goto out;
+ }
+ break;
+ case CHSC_SDA_OC_MSS:
+ ret = css_enable_mss();
+ if (ret == -EINVAL) {
+ resp_code = 0x0101;
+ goto out;
+ }
+ break;
+ default:
+ resp_code = 0x0003;
+ goto out;
+ }
+
+out:
+ res->code = cpu_to_be16(resp_code);
+ res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+ res->param = 0;
+}
+
+static void ioinst_handle_chsc_unimplemented(ChscResp *res)
+{
+ res->len = cpu_to_be16(CHSC_MIN_RESP_LEN);
+ res->code = cpu_to_be16(0x0004);
+ res->param = 0;
+}
+
+int ioinst_handle_chsc(CPUS390XState *env, uint32_t ipb)
+{
+ ChscReq *req;
+ ChscResp *res;
+ uint64_t addr;
+ int reg;
+ uint16_t len;
+ uint16_t command;
+ hwaddr map_size = TARGET_PAGE_SIZE;
+ int ret = 0;
+
+ trace_ioinst("chsc");
+ reg = (ipb >> 20) & 0x00f;
+ addr = env->regs[reg];
+ /* Page boundary? */
+ if (addr & 0xfff) {
+ program_interrupt(env, PGM_SPECIFICATION, 2);
+ return -EIO;
+ }
+ req = s390_cpu_physical_memory_map(env, addr, &map_size, 1);
+ if (!req || map_size != TARGET_PAGE_SIZE) {
+ program_interrupt(env, PGM_SPECIFICATION, 2);
+ ret = -EIO;
+ goto out;
+ }
+ len = be16_to_cpu(req->len);
+ /* Length field valid? */
+ if ((len < 16) || (len > 4088) || (len & 7)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ ret = -EIO;
+ goto out;
+ }
+ memset((char *)req + len, 0, TARGET_PAGE_SIZE - len);
+ res = (void *)((char *)req + len);
+ command = be16_to_cpu(req->command);
+ trace_ioinst_chsc_cmd(command, len);
+ switch (command) {
+ case CHSC_SCSC:
+ ioinst_handle_chsc_scsc(req, res);
+ break;
+ case CHSC_SCPD:
+ ioinst_handle_chsc_scpd(req, res);
+ break;
+ case CHSC_SDA:
+ ioinst_handle_chsc_sda(req, res);
+ break;
+ default:
+ ioinst_handle_chsc_unimplemented(res);
+ break;
+ }
+
+out:
+ s390_cpu_physical_memory_unmap(env, req, map_size, 1);
+ return ret;
+}
+
+int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb)
+{
+ uint64_t addr;
+ int lowcore;
+ IOIntCode *int_code;
+ hwaddr len, orig_len;
+ int ret;
+
+ trace_ioinst("tpi");
+ addr = decode_basedisp_s(env, ipb);
+ lowcore = addr ? 0 : 1;
+ len = lowcore ? 8 /* two words */ : 12 /* three words */;
+ orig_len = len;
+ int_code = s390_cpu_physical_memory_map(env, addr, &len, 1);
+ if (!int_code || (len != orig_len)) {
+ program_interrupt(env, PGM_SPECIFICATION, 2);
+ ret = -EIO;
+ goto out;
+ }
+ ret = css_do_tpi(int_code, lowcore);
+out:
+ s390_cpu_physical_memory_unmap(env, int_code, len, 1);
+ return ret;
+}
+
+#define SCHM_REG1_RES(_reg) (_reg & 0x000000000ffffffc)
+#define SCHM_REG1_MBK(_reg) ((_reg & 0x00000000f0000000) >> 28)
+#define SCHM_REG1_UPD(_reg) ((_reg & 0x0000000000000002) >> 1)
+#define SCHM_REG1_DCT(_reg) (_reg & 0x0000000000000001)
+
+int ioinst_handle_schm(CPUS390XState *env, uint64_t reg1, uint64_t reg2,
+ uint32_t ipb)
+{
+ uint8_t mbk;
+ int update;
+ int dct;
+
+ trace_ioinst("schm");
+
+ if (SCHM_REG1_RES(reg1)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+
+ mbk = SCHM_REG1_MBK(reg1);
+ update = SCHM_REG1_UPD(reg1);
+ dct = SCHM_REG1_DCT(reg1);
+
+ if (update && (reg2 & 0x0000000000000fff)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+
+ css_do_schm(mbk, update, dct, update ? reg2 : 0);
+
+ return 0;
+}
+
+int ioinst_handle_rsch(CPUS390XState *env, uint64_t reg1)
+{
+ int cssid, ssid, schid, m;
+ SubchDev *sch;
+ int ret = -ENODEV;
+ int cc;
+
+ if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ trace_ioinst_sch_id("rsch", cssid, ssid, schid);
+ sch = css_find_subch(m, cssid, ssid, schid);
+ if (sch && css_subch_visible(sch)) {
+ ret = css_do_rsch(sch);
+ }
+ switch (ret) {
+ case -ENODEV:
+ cc = 3;
+ break;
+ case -EINVAL:
+ cc = 2;
+ break;
+ case 0:
+ cc = 0;
+ break;
+ default:
+ cc = 1;
+ break;
+ }
+
+ return cc;
+
+}
+
+#define RCHP_REG1_RES(_reg) (_reg & 0x00000000ff00ff00)
+#define RCHP_REG1_CSSID(_reg) ((_reg & 0x0000000000ff0000) >> 16)
+#define RCHP_REG1_CHPID(_reg) (_reg & 0x00000000000000ff)
+int ioinst_handle_rchp(CPUS390XState *env, uint64_t reg1)
+{
+ int cc;
+ uint8_t cssid;
+ uint8_t chpid;
+ int ret;
+
+ if (RCHP_REG1_RES(reg1)) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+
+ cssid = RCHP_REG1_CSSID(reg1);
+ chpid = RCHP_REG1_CHPID(reg1);
+
+ trace_ioinst_chp_id("rchp", cssid, chpid);
+
+ ret = css_do_rchp(cssid, chpid);
+
+ switch (ret) {
+ case -ENODEV:
+ cc = 3;
+ break;
+ case -EBUSY:
+ cc = 2;
+ break;
+ case 0:
+ cc = 0;
+ break;
+ default:
+ /* Invalid channel subsystem. */
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+
+ return cc;
+}
+
+#define SAL_REG1_INVALID(_reg) (_reg & 0x0000000080000000)
+int ioinst_handle_sal(CPUS390XState *env, uint64_t reg1)
+{
+ /* We do not provide address limit checking, so let's suppress it. */
+ if (SAL_REG1_INVALID(reg1) || reg1 & 0x000000000000ffff) {
+ program_interrupt(env, PGM_OPERAND, 2);
+ return -EIO;
+ }
+ return 0;
+}
diff --git a/target-s390x/ioinst.h b/target-s390x/ioinst.h
new file mode 100644
index 0000000000..d5a43f4a71
--- /dev/null
+++ b/target-s390x/ioinst.h
@@ -0,0 +1,230 @@
+/*
+ * S/390 channel I/O instructions
+ *
+ * Copyright 2012 IBM Corp.
+ * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+*/
+
+#ifndef IOINST_S390X_H
+#define IOINST_S390X_H
+/*
+ * Channel I/O related definitions, as defined in the Principles
+ * Of Operation (and taken from the Linux implementation).
+ */
+
+/* subchannel status word (command mode only) */
+typedef struct SCSW {
+ uint16_t flags;
+ uint16_t ctrl;
+ uint32_t cpa;
+ uint8_t dstat;
+ uint8_t cstat;
+ uint16_t count;
+} QEMU_PACKED SCSW;
+
+#define SCSW_FLAGS_MASK_KEY 0xf000
+#define SCSW_FLAGS_MASK_SCTL 0x0800
+#define SCSW_FLAGS_MASK_ESWF 0x0400
+#define SCSW_FLAGS_MASK_CC 0x0300
+#define SCSW_FLAGS_MASK_FMT 0x0080
+#define SCSW_FLAGS_MASK_PFCH 0x0040
+#define SCSW_FLAGS_MASK_ISIC 0x0020
+#define SCSW_FLAGS_MASK_ALCC 0x0010
+#define SCSW_FLAGS_MASK_SSI 0x0008
+#define SCSW_FLAGS_MASK_ZCC 0x0004
+#define SCSW_FLAGS_MASK_ECTL 0x0002
+#define SCSW_FLAGS_MASK_PNO 0x0001
+
+#define SCSW_CTRL_MASK_FCTL 0x7000
+#define SCSW_CTRL_MASK_ACTL 0x0fe0
+#define SCSW_CTRL_MASK_STCTL 0x001f
+
+#define SCSW_FCTL_CLEAR_FUNC 0x1000
+#define SCSW_FCTL_HALT_FUNC 0x2000
+#define SCSW_FCTL_START_FUNC 0x4000
+
+#define SCSW_ACTL_SUSP 0x0020
+#define SCSW_ACTL_DEVICE_ACTIVE 0x0040
+#define SCSW_ACTL_SUBCH_ACTIVE 0x0080
+#define SCSW_ACTL_CLEAR_PEND 0x0100
+#define SCSW_ACTL_HALT_PEND 0x0200
+#define SCSW_ACTL_START_PEND 0x0400
+#define SCSW_ACTL_RESUME_PEND 0x0800
+
+#define SCSW_STCTL_STATUS_PEND 0x0001
+#define SCSW_STCTL_SECONDARY 0x0002
+#define SCSW_STCTL_PRIMARY 0x0004
+#define SCSW_STCTL_INTERMEDIATE 0x0008
+#define SCSW_STCTL_ALERT 0x0010
+
+#define SCSW_DSTAT_ATTENTION 0x80
+#define SCSW_DSTAT_STAT_MOD 0x40
+#define SCSW_DSTAT_CU_END 0x20
+#define SCSW_DSTAT_BUSY 0x10
+#define SCSW_DSTAT_CHANNEL_END 0x08
+#define SCSW_DSTAT_DEVICE_END 0x04
+#define SCSW_DSTAT_UNIT_CHECK 0x02
+#define SCSW_DSTAT_UNIT_EXCEP 0x01
+
+#define SCSW_CSTAT_PCI 0x80
+#define SCSW_CSTAT_INCORR_LEN 0x40
+#define SCSW_CSTAT_PROG_CHECK 0x20
+#define SCSW_CSTAT_PROT_CHECK 0x10
+#define SCSW_CSTAT_DATA_CHECK 0x08
+#define SCSW_CSTAT_CHN_CTRL_CHK 0x04
+#define SCSW_CSTAT_INTF_CTRL_CHK 0x02
+#define SCSW_CSTAT_CHAIN_CHECK 0x01
+
+/* path management control word */
+typedef struct PMCW {
+ uint32_t intparm;
+ uint16_t flags;
+ uint16_t devno;
+ uint8_t lpm;
+ uint8_t pnom;
+ uint8_t lpum;
+ uint8_t pim;
+ uint16_t mbi;
+ uint8_t pom;
+ uint8_t pam;
+ uint8_t chpid[8];
+ uint32_t chars;
+} QEMU_PACKED PMCW;
+
+#define PMCW_FLAGS_MASK_QF 0x8000
+#define PMCW_FLAGS_MASK_W 0x4000
+#define PMCW_FLAGS_MASK_ISC 0x3800
+#define PMCW_FLAGS_MASK_ENA 0x0080
+#define PMCW_FLAGS_MASK_LM 0x0060
+#define PMCW_FLAGS_MASK_MME 0x0018
+#define PMCW_FLAGS_MASK_MP 0x0004
+#define PMCW_FLAGS_MASK_TF 0x0002
+#define PMCW_FLAGS_MASK_DNV 0x0001
+#define PMCW_FLAGS_MASK_INVALID 0x0700
+
+#define PMCW_CHARS_MASK_ST 0x00e00000
+#define PMCW_CHARS_MASK_MBFC 0x00000004
+#define PMCW_CHARS_MASK_XMWME 0x00000002
+#define PMCW_CHARS_MASK_CSENSE 0x00000001
+#define PMCW_CHARS_MASK_INVALID 0xff1ffff8
+
+/* subchannel information block */
+typedef struct SCHIB {
+ PMCW pmcw;
+ SCSW scsw;
+ uint64_t mba;
+ uint8_t mda[4];
+} QEMU_PACKED SCHIB;
+
+/* interruption response block */
+typedef struct IRB {
+ SCSW scsw;
+ uint32_t esw[5];
+ uint32_t ecw[8];
+ uint32_t emw[8];
+} QEMU_PACKED IRB;
+
+/* operation request block */
+typedef struct ORB {
+ uint32_t intparm;
+ uint16_t ctrl0;
+ uint8_t lpm;
+ uint8_t ctrl1;
+ uint32_t cpa;
+} QEMU_PACKED ORB;
+
+#define ORB_CTRL0_MASK_KEY 0xf000
+#define ORB_CTRL0_MASK_SPND 0x0800
+#define ORB_CTRL0_MASK_STR 0x0400
+#define ORB_CTRL0_MASK_MOD 0x0200
+#define ORB_CTRL0_MASK_SYNC 0x0100
+#define ORB_CTRL0_MASK_FMT 0x0080
+#define ORB_CTRL0_MASK_PFCH 0x0040
+#define ORB_CTRL0_MASK_ISIC 0x0020
+#define ORB_CTRL0_MASK_ALCC 0x0010
+#define ORB_CTRL0_MASK_SSIC 0x0008
+#define ORB_CTRL0_MASK_C64 0x0002
+#define ORB_CTRL0_MASK_I2K 0x0001
+#define ORB_CTRL0_MASK_INVALID 0x0004
+
+#define ORB_CTRL1_MASK_ILS 0x80
+#define ORB_CTRL1_MASK_MIDAW 0x40
+#define ORB_CTRL1_MASK_ORBX 0x01
+#define ORB_CTRL1_MASK_INVALID 0x3e
+
+/* channel command word (type 1) */
+typedef struct CCW1 {
+ uint8_t cmd_code;
+ uint8_t flags;
+ uint16_t count;
+ uint32_t cda;
+} QEMU_PACKED CCW1;
+
+#define CCW_FLAG_DC 0x80
+#define CCW_FLAG_CC 0x40
+#define CCW_FLAG_SLI 0x20
+#define CCW_FLAG_SKIP 0x10
+#define CCW_FLAG_PCI 0x08
+#define CCW_FLAG_IDA 0x04
+#define CCW_FLAG_SUSPEND 0x02
+
+#define CCW_CMD_NOOP 0x03
+#define CCW_CMD_BASIC_SENSE 0x04
+#define CCW_CMD_TIC 0x08
+#define CCW_CMD_SENSE_ID 0xe4
+
+typedef struct CRW {
+ uint16_t flags;
+ uint16_t rsid;
+} QEMU_PACKED CRW;
+
+#define CRW_FLAGS_MASK_S 0x4000
+#define CRW_FLAGS_MASK_R 0x2000
+#define CRW_FLAGS_MASK_C 0x1000
+#define CRW_FLAGS_MASK_RSC 0x0f00
+#define CRW_FLAGS_MASK_A 0x0080
+#define CRW_FLAGS_MASK_ERC 0x003f
+
+#define CRW_ERC_INIT 0x02
+#define CRW_ERC_IPI 0x04
+
+#define CRW_RSC_SUBCH 0x3
+#define CRW_RSC_CHP 0x4
+
+/* I/O interruption code */
+typedef struct IOIntCode {
+ uint32_t subsys_id;
+ uint32_t intparm;
+ uint32_t interrupt_id;
+} QEMU_PACKED IOIntCode;
+
+/* schid disintegration */
+#define IOINST_SCHID_ONE(_schid) ((_schid & 0x00010000) >> 16)
+#define IOINST_SCHID_M(_schid) ((_schid & 0x00080000) >> 19)
+#define IOINST_SCHID_CSSID(_schid) ((_schid & 0xff000000) >> 24)
+#define IOINST_SCHID_SSID(_schid) ((_schid & 0x00060000) >> 17)
+#define IOINST_SCHID_NR(_schid) (_schid & 0x0000ffff)
+
+int ioinst_disassemble_sch_ident(uint32_t value, int *m, int *cssid, int *ssid,
+ int *schid);
+int ioinst_handle_xsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_csch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_hsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_msch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_ssch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_stcrw(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_stsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_tsch(CPUS390XState *env, uint64_t reg1, uint32_t ipb);
+int ioinst_handle_chsc(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_tpi(CPUS390XState *env, uint32_t ipb);
+int ioinst_handle_schm(CPUS390XState *env, uint64_t reg1, uint64_t reg2,
+ uint32_t ipb);
+int ioinst_handle_rsch(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_rchp(CPUS390XState *env, uint64_t reg1);
+int ioinst_handle_sal(CPUS390XState *env, uint64_t reg1);
+
+#endif
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index add6a58f9c..2c24182001 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -47,9 +47,29 @@
#define IPA0_DIAG 0x8300
#define IPA0_SIGP 0xae00
-#define IPA0_PRIV 0xb200
+#define IPA0_B2 0xb200
+#define IPA0_B9 0xb900
+#define IPA0_EB 0xeb00
#define PRIV_SCLP_CALL 0x20
+#define PRIV_CSCH 0x30
+#define PRIV_HSCH 0x31
+#define PRIV_MSCH 0x32
+#define PRIV_SSCH 0x33
+#define PRIV_STSCH 0x34
+#define PRIV_TSCH 0x35
+#define PRIV_TPI 0x36
+#define PRIV_SAL 0x37
+#define PRIV_RSCH 0x38
+#define PRIV_STCRW 0x39
+#define PRIV_STCPS 0x3a
+#define PRIV_RCHP 0x3b
+#define PRIV_SCHM 0x3c
+#define PRIV_CHSC 0x5f
+#define PRIV_SIGA 0x74
+#define PRIV_XSCH 0x76
+#define PRIV_SQBS 0x8a
+#define PRIV_EQBS 0x9c
#define DIAG_KVM_HYPERCALL 0x500
#define DIAG_KVM_BREAKPOINT 0x501
@@ -76,6 +96,11 @@ int kvm_arch_init(KVMState *s)
return 0;
}
+unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+{
+ return cpu->cpu_index;
+}
+
int kvm_arch_init_vcpu(CPUState *cpu)
{
int ret = 0;
@@ -375,10 +400,123 @@ static int kvm_sclp_service_call(S390CPU *cpu, struct kvm_run *run,
return 0;
}
-static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
+static int kvm_handle_css_inst(S390CPU *cpu, struct kvm_run *run,
+ uint8_t ipa0, uint8_t ipa1, uint8_t ipb)
+{
+ int r = 0;
+ int no_cc = 0;
+ CPUS390XState *env = &cpu->env;
+
+ if (ipa0 != 0xb2) {
+ /* Not handled for now. */
+ return -1;
+ }
+ cpu_synchronize_state(env);
+ switch (ipa1) {
+ case PRIV_XSCH:
+ r = ioinst_handle_xsch(env, env->regs[1]);
+ break;
+ case PRIV_CSCH:
+ r = ioinst_handle_csch(env, env->regs[1]);
+ break;
+ case PRIV_HSCH:
+ r = ioinst_handle_hsch(env, env->regs[1]);
+ break;
+ case PRIV_MSCH:
+ r = ioinst_handle_msch(env, env->regs[1], run->s390_sieic.ipb);
+ break;
+ case PRIV_SSCH:
+ r = ioinst_handle_ssch(env, env->regs[1], run->s390_sieic.ipb);
+ break;
+ case PRIV_STCRW:
+ r = ioinst_handle_stcrw(env, run->s390_sieic.ipb);
+ break;
+ case PRIV_STSCH:
+ r = ioinst_handle_stsch(env, env->regs[1], run->s390_sieic.ipb);
+ break;
+ case PRIV_TSCH:
+ /* We should only get tsch via KVM_EXIT_S390_TSCH. */
+ fprintf(stderr, "Spurious tsch intercept\n");
+ break;
+ case PRIV_CHSC:
+ r = ioinst_handle_chsc(env, run->s390_sieic.ipb);
+ break;
+ case PRIV_TPI:
+ /* This should have been handled by kvm already. */
+ fprintf(stderr, "Spurious tpi intercept\n");
+ break;
+ case PRIV_SCHM:
+ no_cc = 1;
+ r = ioinst_handle_schm(env, env->regs[1], env->regs[2],
+ run->s390_sieic.ipb);
+ break;
+ case PRIV_RSCH:
+ r = ioinst_handle_rsch(env, env->regs[1]);
+ break;
+ case PRIV_RCHP:
+ r = ioinst_handle_rchp(env, env->regs[1]);
+ break;
+ case PRIV_STCPS:
+ /* We do not provide this instruction, it is suppressed. */
+ no_cc = 1;
+ r = 0;
+ break;
+ case PRIV_SAL:
+ no_cc = 1;
+ r = ioinst_handle_sal(env, env->regs[1]);
+ break;
+ default:
+ r = -1;
+ break;
+ }
+
+ if (r >= 0) {
+ if (!no_cc) {
+ setcc(cpu, r);
+ }
+ r = 0;
+ } else if (r < -1) {
+ r = 0;
+ }
+ return r;
+}
+
+static int is_ioinst(uint8_t ipa0, uint8_t ipa1, uint8_t ipb)
+{
+ int ret = 0;
+ uint16_t ipa = (ipa0 << 8) | ipa1;
+
+ switch (ipa) {
+ case IPA0_B2 | PRIV_CSCH:
+ case IPA0_B2 | PRIV_HSCH:
+ case IPA0_B2 | PRIV_MSCH:
+ case IPA0_B2 | PRIV_SSCH:
+ case IPA0_B2 | PRIV_STSCH:
+ case IPA0_B2 | PRIV_TPI:
+ case IPA0_B2 | PRIV_SAL:
+ case IPA0_B2 | PRIV_RSCH:
+ case IPA0_B2 | PRIV_STCRW:
+ case IPA0_B2 | PRIV_STCPS:
+ case IPA0_B2 | PRIV_RCHP:
+ case IPA0_B2 | PRIV_SCHM:
+ case IPA0_B2 | PRIV_CHSC:
+ case IPA0_B2 | PRIV_SIGA:
+ case IPA0_B2 | PRIV_XSCH:
+ case IPA0_B9 | PRIV_EQBS:
+ case IPA0_EB | PRIV_SQBS:
+ ret = 1;
+ break;
+ }
+
+ return ret;
+}
+
+static int handle_priv(S390CPU *cpu, struct kvm_run *run,
+ uint8_t ipa0, uint8_t ipa1)
{
int r = 0;
uint16_t ipbh0 = (run->s390_sieic.ipb & 0xffff0000) >> 16;
+ uint8_t ipb = run->s390_sieic.ipb & 0xff;
dprintf("KVM: PRIV: %d\n", ipa1);
switch (ipa1) {
@@ -386,8 +524,16 @@ static int handle_priv(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
r = kvm_sclp_service_call(cpu, run, ipbh0);
break;
default:
- dprintf("KVM: unknown PRIV: 0x%x\n", ipa1);
- r = -1;
+ if (is_ioinst(ipa0, ipa1, ipb)) {
+ r = kvm_handle_css_inst(cpu, run, ipa0, ipa1, ipb);
+ if (r == -1) {
+ setcc(cpu, 3);
+ r = 0;
+ }
+ } else {
+ dprintf("KVM: unknown PRIV: 0x%x\n", ipa1);
+ r = -1;
+ }
break;
}
@@ -528,15 +674,17 @@ static int handle_instruction(S390CPU *cpu, struct kvm_run *run)
dprintf("handle_instruction 0x%x 0x%x\n", run->s390_sieic.ipa, run->s390_sieic.ipb);
switch (ipa0) {
- case IPA0_PRIV:
- r = handle_priv(cpu, run, ipa1);
- break;
- case IPA0_DIAG:
- r = handle_diag(env, run, ipb_code);
- break;
- case IPA0_SIGP:
- r = handle_sigp(cpu, run, ipa1);
- break;
+ case IPA0_B2:
+ case IPA0_B9:
+ case IPA0_EB:
+ r = handle_priv(cpu, run, ipa0 >> 8, ipa1);
+ break;
+ case IPA0_DIAG:
+ r = handle_diag(env, run, ipb_code);
+ break;
+ case IPA0_SIGP:
+ r = handle_sigp(cpu, run, ipa1);
+ break;
}
if (r < 0) {
@@ -595,6 +743,43 @@ static int handle_intercept(S390CPU *cpu)
return r;
}
+static int handle_tsch(S390CPU *cpu)
+{
+ CPUS390XState *env = &cpu->env;
+ CPUState *cs = CPU(cpu);
+ struct kvm_run *run = cs->kvm_run;
+ int ret;
+
+ cpu_synchronize_state(env);
+ ret = ioinst_handle_tsch(env, env->regs[1], run->s390_tsch.ipb);
+ if (ret >= 0) {
+ /* Success; set condition code. */
+ setcc(cpu, ret);
+ ret = 0;
+ } else if (ret < -1) {
+ /*
+ * Failure.
+ * If an I/O interrupt had been dequeued, we have to reinject it.
+ */
+ if (run->s390_tsch.dequeued) {
+ uint16_t subchannel_id = run->s390_tsch.subchannel_id;
+ uint16_t subchannel_nr = run->s390_tsch.subchannel_nr;
+ uint32_t io_int_parm = run->s390_tsch.io_int_parm;
+ uint32_t io_int_word = run->s390_tsch.io_int_word;
+ uint32_t type = ((subchannel_id & 0xff00) << 24) |
+ ((subchannel_id & 0x00060) << 22) | (subchannel_nr << 16);
+
+ kvm_s390_interrupt_internal(cpu, type,
+ ((uint32_t)subchannel_id << 16)
+ | subchannel_nr,
+ ((uint64_t)io_int_parm << 32)
+ | io_int_word, 1);
+ }
+ ret = 0;
+ }
+ return ret;
+}
+
int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
{
S390CPU *cpu = S390_CPU(cs);
@@ -607,6 +792,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
case KVM_EXIT_S390_RESET:
qemu_system_reset_request();
break;
+ case KVM_EXIT_S390_TSCH:
+ ret = handle_tsch(cpu);
+ break;
default:
fprintf(stderr, "Unknown KVM exit: %d\n", run->exit_reason);
break;
@@ -632,3 +820,33 @@ int kvm_arch_on_sigbus(int code, void *addr)
{
return 1;
}
+
+void kvm_s390_io_interrupt(S390CPU *cpu, uint16_t subchannel_id,
+ uint16_t subchannel_nr, uint32_t io_int_parm,
+ uint32_t io_int_word)
+{
+ uint32_t type;
+
+ type = ((subchannel_id & 0xff00) << 24) |
+ ((subchannel_id & 0x00060) << 22) | (subchannel_nr << 16);
+ kvm_s390_interrupt_internal(cpu, type,
+ ((uint32_t)subchannel_id << 16) | subchannel_nr,
+ ((uint64_t)io_int_parm << 32) | io_int_word, 1);
+}
+
+void kvm_s390_crw_mchk(S390CPU *cpu)
+{
+ kvm_s390_interrupt_internal(cpu, KVM_S390_MCHK, 1 << 28,
+ 0x00400f1d40330000, 1);
+}
+
+void kvm_s390_enable_css_support(S390CPU *cpu)
+{
+ struct kvm_enable_cap cap = {};
+ int r;
+
+ /* Activate host kernel channel subsystem support. */
+ cap.cap = KVM_CAP_S390_CSS_SUPPORT;
+ r = kvm_vcpu_ioctl(CPU(cpu), KVM_ENABLE_CAP, &cap);
+ assert(r == 0);
+}
diff --git a/target-unicore32/cpu.c b/target-unicore32/cpu.c
index 884c101010..c120440653 100644
--- a/target-unicore32/cpu.c
+++ b/target-unicore32/cpu.c
@@ -22,6 +22,22 @@ static inline void set_feature(CPUUniCore32State *env, int feature)
/* CPU models */
+static ObjectClass *uc32_cpu_class_by_name(const char *cpu_model)
+{
+ ObjectClass *oc;
+
+ if (cpu_model == NULL) {
+ return NULL;
+ }
+
+ oc = object_class_by_name(cpu_model);
+ if (oc != NULL && (!object_class_dynamic_cast(oc, TYPE_UNICORE32_CPU) ||
+ object_class_is_abstract(oc))) {
+ oc = NULL;
+ }
+ return oc;
+}
+
typedef struct UniCore32CPUInfo {
const char *name;
void (*instance_init)(Object *obj);
@@ -80,6 +96,13 @@ static void uc32_cpu_initfn(Object *obj)
tlb_flush(env, 1);
}
+static void uc32_cpu_class_init(ObjectClass *oc, void *data)
+{
+ CPUClass *cc = CPU_CLASS(oc);
+
+ cc->class_by_name = uc32_cpu_class_by_name;
+}
+
static void uc32_register_cpu_type(const UniCore32CPUInfo *info)
{
TypeInfo type_info = {
@@ -88,7 +111,7 @@ static void uc32_register_cpu_type(const UniCore32CPUInfo *info)
.instance_init = info->instance_init,
};
- type_register_static(&type_info);
+ type_register(&type_info);
}
static const TypeInfo uc32_cpu_type_info = {
@@ -98,6 +121,7 @@ static const TypeInfo uc32_cpu_type_info = {
.instance_init = uc32_cpu_initfn,
.abstract = true,
.class_size = sizeof(UniCore32CPUClass),
+ .class_init = uc32_cpu_class_init,
};
static void uc32_cpu_register_types(void)
diff --git a/target-unicore32/helper.c b/target-unicore32/helper.c
index 5359538ea5..183b5b3577 100644
--- a/target-unicore32/helper.c
+++ b/target-unicore32/helper.c
@@ -29,12 +29,14 @@ CPUUniCore32State *uc32_cpu_init(const char *cpu_model)
{
UniCore32CPU *cpu;
CPUUniCore32State *env;
+ ObjectClass *oc;
static int inited = 1;
- if (object_class_by_name(cpu_model) == NULL) {
+ oc = cpu_class_by_name(TYPE_UNICORE32_CPU, cpu_model);
+ if (oc == NULL) {
return NULL;
}
- cpu = UNICORE32_CPU(object_new(cpu_model));
+ cpu = UNICORE32_CPU(object_new(object_class_get_name(oc)));
env = &cpu->env;
if (inited) {
diff --git a/tests/.gitignore b/tests/.gitignore
index f9041f3d32..38c94ef1da 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -10,4 +10,5 @@ test-qmp-commands.h
test-qmp-commands
test-qmp-input-strict
test-qmp-marshal.c
+test-x86-cpuid
*-test
diff --git a/tests/Makefile b/tests/Makefile
index 442b286ccf..c681cebd18 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -45,6 +45,11 @@ gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c
gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c
check-unit-y += tests/test-thread-pool$(EXESUF)
gcov-files-test-thread-pool-y = thread-pool.c
+gcov-files-test-hbitmap-y = util/hbitmap.c
+check-unit-y += tests/test-hbitmap$(EXESUF)
+check-unit-y += tests/test-x86-cpuid$(EXESUF)
+# all code tested by test-x86-cpuid is inside topology.h
+gcov-files-test-x86-cpuid-y =
check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
@@ -72,12 +77,15 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
tests/test-coroutine.o tests/test-string-output-visitor.o \
tests/test-string-input-visitor.o tests/test-qmp-output-visitor.o \
tests/test-qmp-input-visitor.o tests/test-qmp-input-strict.o \
- tests/test-qmp-commands.o tests/test-visitor-serialization.o
+ tests/test-qmp-commands.o tests/test-visitor-serialization.o \
+ tests/test-x86-cpuid.o
test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o
$(test-obj-y): QEMU_INCLUDES += -Itests
+tests/test-x86-cpuid.o: QEMU_INCLUDES += -I$(SRC_PATH)/target-i386
+
tests/check-qint$(EXESUF): tests/check-qint.o libqemuutil.a
tests/check-qstring$(EXESUF): tests/check-qstring.o libqemuutil.a
tests/check-qdict$(EXESUF): tests/check-qdict.o libqemuutil.a
@@ -88,6 +96,8 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil
tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a
tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a
tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a
+tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a
+tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
tests/test-qapi-types.c tests/test-qapi-types.h :\
$(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py
diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index c6eb851871..b040820c51 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -207,6 +207,37 @@ class TestSingleDrive(ImageMirroringTestCase):
self.assertTrue(self.compare_images(test_img, target_img),
'target image does not match source after mirroring')
+ def test_small_buffer(self):
+ self.assert_no_active_mirrors()
+
+ # A small buffer is rounded up automatically
+ result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+ buf_size=4096, target=target_img)
+ self.assert_qmp(result, 'return', {})
+
+ self.complete_and_wait()
+ result = self.vm.qmp('query-block')
+ self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+ self.vm.shutdown()
+ self.assertTrue(self.compare_images(test_img, target_img),
+ 'target image does not match source after mirroring')
+
+ def test_small_buffer2(self):
+ self.assert_no_active_mirrors()
+
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,size=%d'
+ % (TestSingleDrive.image_len, TestSingleDrive.image_len), target_img)
+ result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+ buf_size=65536, mode='existing', target=target_img)
+ self.assert_qmp(result, 'return', {})
+
+ self.complete_and_wait()
+ result = self.vm.qmp('query-block')
+ self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+ self.vm.shutdown()
+ self.assertTrue(self.compare_images(test_img, target_img),
+ 'target image does not match source after mirroring')
+
def test_large_cluster(self):
self.assert_no_active_mirrors()
@@ -292,6 +323,27 @@ class TestMirrorNoBacking(ImageMirroringTestCase):
self.assertTrue(self.compare_images(test_img, target_img),
'target image does not match source after mirroring')
+ def test_large_cluster(self):
+ self.assert_no_active_mirrors()
+
+ # qemu-img create fails if the image is not there
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'size=%d'
+ %(TestMirrorNoBacking.image_len), target_backing_img)
+ qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
+ % (TestMirrorNoBacking.image_len, target_backing_img), target_img)
+ os.remove(target_backing_img)
+
+ result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+ mode='existing', target=target_img)
+ self.assert_qmp(result, 'return', {})
+
+ self.complete_and_wait()
+ result = self.vm.qmp('query-block')
+ self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+ self.vm.shutdown()
+ self.assertTrue(self.compare_images(test_img, target_img),
+ 'target image does not match source after mirroring')
+
class TestReadErrors(ImageMirroringTestCase):
image_len = 2 * 1024 * 1024 # MB
@@ -330,6 +382,9 @@ new_state = "1"
'-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
% (self.blkdebug_file, backing_img),
test_img)
+ # Write something for tests that use sync='top'
+ qemu_io('-c', 'write %d 512' % (self.MIRROR_GRANULARITY + 65536),
+ test_img)
self.vm = iotests.VM().add_drive(test_img)
self.vm.launch()
@@ -383,6 +438,32 @@ new_state = "1"
self.complete_and_wait()
self.vm.shutdown()
+ def test_large_cluster(self):
+ self.assert_no_active_mirrors()
+
+ # Test COW into the target image. The first half of the
+ # cluster at MIRROR_GRANULARITY has to be copied from
+ # backing_img, even though sync='top'.
+ qemu_img('create', '-f', iotests.imgfmt, '-ocluster_size=131072,backing_file=%s' %(backing_img), target_img)
+ result = self.vm.qmp('drive-mirror', device='drive0', sync='top',
+ on_source_error='ignore',
+ mode='existing', target=target_img)
+ self.assert_qmp(result, 'return', {})
+
+ event = self.vm.get_qmp_event(wait=True)
+ self.assertEquals(event['event'], 'BLOCK_JOB_ERROR')
+ self.assert_qmp(event, 'data/device', 'drive0')
+ self.assert_qmp(event, 'data/operation', 'read')
+ result = self.vm.qmp('query-block-jobs')
+ self.assert_qmp(result, 'return[0]/paused', False)
+ self.complete_and_wait()
+ self.vm.shutdown()
+
+ # Detach blkdebug to compare images successfully
+ qemu_img('rebase', '-f', iotests.imgfmt, '-u', '-b', backing_img, test_img)
+ self.assertTrue(self.compare_images(test_img, target_img),
+ 'target image does not match source after mirroring')
+
def test_stop_read(self):
self.assert_no_active_mirrors()
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 71009c239f..84bfd63fba 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-..................
+......................
----------------------------------------------------------------------
-Ran 18 tests
+Ran 22 tests
OK
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
new file mode 100644
index 0000000000..8c902f2055
--- /dev/null
+++ b/tests/test-hbitmap.c
@@ -0,0 +1,401 @@
+/*
+ * Hierarchical bitmap unit-tests.
+ *
+ * Copyright (C) 2012 Red Hat Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include <stdarg.h>
+#include "qemu/hbitmap.h"
+
+#define LOG_BITS_PER_LONG (BITS_PER_LONG == 32 ? 5 : 6)
+
+#define L1 BITS_PER_LONG
+#define L2 (BITS_PER_LONG * L1)
+#define L3 (BITS_PER_LONG * L2)
+
+typedef struct TestHBitmapData {
+ HBitmap *hb;
+ unsigned long *bits;
+ size_t size;
+ int granularity;
+} TestHBitmapData;
+
+
+/* Check that the HBitmap and the shadow bitmap contain the same data,
+ * ignoring the same "first" bits.
+ */
+static void hbitmap_test_check(TestHBitmapData *data,
+ uint64_t first)
+{
+ uint64_t count = 0;
+ size_t pos;
+ int bit;
+ HBitmapIter hbi;
+ int64_t i, next;
+
+ hbitmap_iter_init(&hbi, data->hb, first);
+
+ i = first;
+ for (;;) {
+ next = hbitmap_iter_next(&hbi);
+ if (next < 0) {
+ next = data->size;
+ }
+
+ while (i < next) {
+ pos = i >> LOG_BITS_PER_LONG;
+ bit = i & (BITS_PER_LONG - 1);
+ i++;
+ g_assert_cmpint(data->bits[pos] & (1UL << bit), ==, 0);
+ }
+
+ if (next == data->size) {
+ break;
+ }
+
+ pos = i >> LOG_BITS_PER_LONG;
+ bit = i & (BITS_PER_LONG - 1);
+ i++;
+ count++;
+ g_assert_cmpint(data->bits[pos] & (1UL << bit), !=, 0);
+ }
+
+ if (first == 0) {
+ g_assert_cmpint(count << data->granularity, ==, hbitmap_count(data->hb));
+ }
+}
+
+/* This is provided instead of a test setup function so that the sizes
+ are kept in the test functions (and not in main()) */
+static void hbitmap_test_init(TestHBitmapData *data,
+ uint64_t size, int granularity)
+{
+ size_t n;
+ data->hb = hbitmap_alloc(size, granularity);
+
+ n = (size + BITS_PER_LONG - 1) / BITS_PER_LONG;
+ if (n == 0) {
+ n = 1;
+ }
+ data->bits = g_new0(unsigned long, n);
+ data->size = size;
+ data->granularity = granularity;
+ if (size) {
+ hbitmap_test_check(data, 0);
+ }
+}
+
+static void hbitmap_test_teardown(TestHBitmapData *data,
+ const void *unused)
+{
+ if (data->hb) {
+ hbitmap_free(data->hb);
+ data->hb = NULL;
+ }
+ if (data->bits) {
+ g_free(data->bits);
+ data->bits = NULL;
+ }
+}
+
+/* Set a range in the HBitmap and in the shadow "simple" bitmap.
+ * The two bitmaps are then tested against each other.
+ */
+static void hbitmap_test_set(TestHBitmapData *data,
+ uint64_t first, uint64_t count)
+{
+ hbitmap_set(data->hb, first, count);
+ while (count-- != 0) {
+ size_t pos = first >> LOG_BITS_PER_LONG;
+ int bit = first & (BITS_PER_LONG - 1);
+ first++;
+
+ data->bits[pos] |= 1UL << bit;
+ }
+
+ if (data->granularity == 0) {
+ hbitmap_test_check(data, 0);
+ }
+}
+
+/* Reset a range in the HBitmap and in the shadow "simple" bitmap.
+ */
+static void hbitmap_test_reset(TestHBitmapData *data,
+ uint64_t first, uint64_t count)
+{
+ hbitmap_reset(data->hb, first, count);
+ while (count-- != 0) {
+ size_t pos = first >> LOG_BITS_PER_LONG;
+ int bit = first & (BITS_PER_LONG - 1);
+ first++;
+
+ data->bits[pos] &= ~(1UL << bit);
+ }
+
+ if (data->granularity == 0) {
+ hbitmap_test_check(data, 0);
+ }
+}
+
+static void hbitmap_test_check_get(TestHBitmapData *data)
+{
+ uint64_t count = 0;
+ uint64_t i;
+
+ for (i = 0; i < data->size; i++) {
+ size_t pos = i >> LOG_BITS_PER_LONG;
+ int bit = i & (BITS_PER_LONG - 1);
+ unsigned long val = data->bits[pos] & (1UL << bit);
+ count += hbitmap_get(data->hb, i);
+ g_assert_cmpint(hbitmap_get(data->hb, i), ==, val != 0);
+ }
+ g_assert_cmpint(count, ==, hbitmap_count(data->hb));
+}
+
+static void test_hbitmap_zero(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, 0, 0);
+}
+
+static void test_hbitmap_unaligned(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L3 + 23, 0);
+ hbitmap_test_set(data, 0, 1);
+ hbitmap_test_set(data, L3 + 22, 1);
+}
+
+static void test_hbitmap_iter_empty(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L1, 0);
+}
+
+static void test_hbitmap_iter_partial(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L3, 0);
+ hbitmap_test_set(data, 0, L3);
+ hbitmap_test_check(data, 1);
+ hbitmap_test_check(data, L1 - 1);
+ hbitmap_test_check(data, L1);
+ hbitmap_test_check(data, L1 * 2 - 1);
+ hbitmap_test_check(data, L2 - 1);
+ hbitmap_test_check(data, L2);
+ hbitmap_test_check(data, L2 + 1);
+ hbitmap_test_check(data, L2 + L1);
+ hbitmap_test_check(data, L2 + L1 * 2 - 1);
+ hbitmap_test_check(data, L2 * 2 - 1);
+ hbitmap_test_check(data, L2 * 2);
+ hbitmap_test_check(data, L2 * 2 + 1);
+ hbitmap_test_check(data, L2 * 2 + L1);
+ hbitmap_test_check(data, L2 * 2 + L1 * 2 - 1);
+ hbitmap_test_check(data, L3 / 2);
+}
+
+static void test_hbitmap_set_all(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L3, 0);
+ hbitmap_test_set(data, 0, L3);
+}
+
+static void test_hbitmap_get_all(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L3, 0);
+ hbitmap_test_set(data, 0, L3);
+ hbitmap_test_check_get(data);
+}
+
+static void test_hbitmap_get_some(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, 2 * L2, 0);
+ hbitmap_test_set(data, 10, 1);
+ hbitmap_test_check_get(data);
+ hbitmap_test_set(data, L1 - 1, 1);
+ hbitmap_test_check_get(data);
+ hbitmap_test_set(data, L1, 1);
+ hbitmap_test_check_get(data);
+ hbitmap_test_set(data, L2 - 1, 1);
+ hbitmap_test_check_get(data);
+ hbitmap_test_set(data, L2, 1);
+ hbitmap_test_check_get(data);
+}
+
+static void test_hbitmap_set_one(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, 2 * L2, 0);
+ hbitmap_test_set(data, 10, 1);
+ hbitmap_test_set(data, L1 - 1, 1);
+ hbitmap_test_set(data, L1, 1);
+ hbitmap_test_set(data, L2 - 1, 1);
+ hbitmap_test_set(data, L2, 1);
+}
+
+static void test_hbitmap_set_two_elem(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, 2 * L2, 0);
+ hbitmap_test_set(data, L1 - 1, 2);
+ hbitmap_test_set(data, L1 * 2 - 1, 4);
+ hbitmap_test_set(data, L1 * 4, L1 + 1);
+ hbitmap_test_set(data, L1 * 8 - 1, L1 + 1);
+ hbitmap_test_set(data, L2 - 1, 2);
+ hbitmap_test_set(data, L2 + L1 - 1, 8);
+ hbitmap_test_set(data, L2 + L1 * 4, L1 + 1);
+ hbitmap_test_set(data, L2 + L1 * 8 - 1, L1 + 1);
+}
+
+static void test_hbitmap_set(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L3 * 2, 0);
+ hbitmap_test_set(data, L1 - 1, L1 + 2);
+ hbitmap_test_set(data, L1 * 3 - 1, L1 + 2);
+ hbitmap_test_set(data, L1 * 5, L1 * 2 + 1);
+ hbitmap_test_set(data, L1 * 8 - 1, L1 * 2 + 1);
+ hbitmap_test_set(data, L2 - 1, L1 + 2);
+ hbitmap_test_set(data, L2 + L1 * 2 - 1, L1 + 2);
+ hbitmap_test_set(data, L2 + L1 * 4, L1 * 2 + 1);
+ hbitmap_test_set(data, L2 + L1 * 7 - 1, L1 * 2 + 1);
+ hbitmap_test_set(data, L2 * 2 - 1, L3 * 2 - L2 * 2);
+}
+
+static void test_hbitmap_set_twice(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L1 * 3, 0);
+ hbitmap_test_set(data, 0, L1 * 3);
+ hbitmap_test_set(data, L1, 1);
+}
+
+static void test_hbitmap_set_overlap(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L3 * 2, 0);
+ hbitmap_test_set(data, L1 - 1, L1 + 2);
+ hbitmap_test_set(data, L1 * 2 - 1, L1 * 2 + 2);
+ hbitmap_test_set(data, 0, L1 * 3);
+ hbitmap_test_set(data, L1 * 8 - 1, L2);
+ hbitmap_test_set(data, L2, L1);
+ hbitmap_test_set(data, L2 - L1 - 1, L1 * 8 + 2);
+ hbitmap_test_set(data, L2, L3 - L2 + 1);
+ hbitmap_test_set(data, L3 - L1, L1 * 3);
+ hbitmap_test_set(data, L3 - 1, 3);
+ hbitmap_test_set(data, L3 - 1, L2);
+}
+
+static void test_hbitmap_reset_empty(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L3, 0);
+ hbitmap_test_reset(data, 0, L3);
+}
+
+static void test_hbitmap_reset(TestHBitmapData *data,
+ const void *unused)
+{
+ hbitmap_test_init(data, L3 * 2, 0);
+ hbitmap_test_set(data, L1 - 1, L1 + 2);
+ hbitmap_test_reset(data, L1 * 2 - 1, L1 * 2 + 2);
+ hbitmap_test_set(data, 0, L1 * 3);
+ hbitmap_test_reset(data, L1 * 8 - 1, L2);
+ hbitmap_test_set(data, L2, L1);
+ hbitmap_test_reset(data, L2 - L1 - 1, L1 * 8 + 2);
+ hbitmap_test_set(data, L2, L3 - L2 + 1);
+ hbitmap_test_reset(data, L3 - L1, L1 * 3);
+ hbitmap_test_set(data, L3 - 1, 3);
+ hbitmap_test_reset(data, L3 - 1, L2);
+ hbitmap_test_set(data, 0, L3 * 2);
+ hbitmap_test_reset(data, 0, L1);
+ hbitmap_test_reset(data, 0, L2);
+ hbitmap_test_reset(data, L3, L3);
+ hbitmap_test_set(data, L3 / 2, L3);
+}
+
+static void test_hbitmap_granularity(TestHBitmapData *data,
+ const void *unused)
+{
+ /* Note that hbitmap_test_check has to be invoked manually in this test. */
+ hbitmap_test_init(data, L1, 1);
+ hbitmap_test_set(data, 0, 1);
+ g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
+ hbitmap_test_check(data, 0);
+ hbitmap_test_set(data, 2, 1);
+ g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
+ hbitmap_test_check(data, 0);
+ hbitmap_test_set(data, 0, 3);
+ g_assert_cmpint(hbitmap_count(data->hb), ==, 4);
+ hbitmap_test_reset(data, 0, 1);
+ g_assert_cmpint(hbitmap_count(data->hb), ==, 2);
+}
+
+static void test_hbitmap_iter_granularity(TestHBitmapData *data,
+ const void *unused)
+{
+ HBitmapIter hbi;
+
+ /* Note that hbitmap_test_check has to be invoked manually in this test. */
+ hbitmap_test_init(data, 131072 << 7, 7);
+ hbitmap_iter_init(&hbi, data->hb, 0);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+ hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
+ hbitmap_iter_init(&hbi, data->hb, 0);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+ hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+ hbitmap_test_set(data, (131072 << 7) - 8, 8);
+ hbitmap_iter_init(&hbi, data->hb, 0);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), ==, (L2 + L1 + 1) << 7);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+
+ hbitmap_iter_init(&hbi, data->hb, (L2 + L1 + 2) << 7);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), ==, 131071 << 7);
+ g_assert_cmpint(hbitmap_iter_next(&hbi), <, 0);
+}
+
+static void hbitmap_test_add(const char *testpath,
+ void (*test_func)(TestHBitmapData *data, const void *user_data))
+{
+ g_test_add(testpath, TestHBitmapData, NULL, NULL, test_func,
+ hbitmap_test_teardown);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+ hbitmap_test_add("/hbitmap/size/0", test_hbitmap_zero);
+ hbitmap_test_add("/hbitmap/size/unaligned", test_hbitmap_unaligned);
+ hbitmap_test_add("/hbitmap/iter/empty", test_hbitmap_iter_empty);
+ hbitmap_test_add("/hbitmap/iter/partial", test_hbitmap_iter_partial);
+ hbitmap_test_add("/hbitmap/iter/granularity", test_hbitmap_iter_granularity);
+ hbitmap_test_add("/hbitmap/get/all", test_hbitmap_get_all);
+ hbitmap_test_add("/hbitmap/get/some", test_hbitmap_get_some);
+ hbitmap_test_add("/hbitmap/set/all", test_hbitmap_set_all);
+ hbitmap_test_add("/hbitmap/set/one", test_hbitmap_set_one);
+ hbitmap_test_add("/hbitmap/set/two-elem", test_hbitmap_set_two_elem);
+ hbitmap_test_add("/hbitmap/set/general", test_hbitmap_set);
+ hbitmap_test_add("/hbitmap/set/twice", test_hbitmap_set_twice);
+ hbitmap_test_add("/hbitmap/set/overlap", test_hbitmap_set_overlap);
+ hbitmap_test_add("/hbitmap/reset/empty", test_hbitmap_reset_empty);
+ hbitmap_test_add("/hbitmap/reset/general", test_hbitmap_reset);
+ hbitmap_test_add("/hbitmap/granularity", test_hbitmap_granularity);
+ g_test_run();
+
+ return 0;
+}
diff --git a/tests/test-x86-cpuid.c b/tests/test-x86-cpuid.c
new file mode 100644
index 0000000000..8d9f96a113
--- /dev/null
+++ b/tests/test-x86-cpuid.c
@@ -0,0 +1,110 @@
+/*
+ * Test code for x86 CPUID and Topology functions
+ *
+ * Copyright (c) 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <glib.h>
+
+#include "topology.h"
+
+static void test_topo_bits(void)
+{
+ /* simple tests for 1 thread per core, 1 core per socket */
+ g_assert_cmpuint(apicid_smt_width(1, 1), ==, 0);
+ g_assert_cmpuint(apicid_core_width(1, 1), ==, 0);
+
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 0), ==, 0);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 1), ==, 1);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 2), ==, 2);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(1, 1, 3), ==, 3);
+
+
+ /* Test field width calculation for multiple values
+ */
+ g_assert_cmpuint(apicid_smt_width(1, 2), ==, 1);
+ g_assert_cmpuint(apicid_smt_width(1, 3), ==, 2);
+ g_assert_cmpuint(apicid_smt_width(1, 4), ==, 2);
+
+ g_assert_cmpuint(apicid_smt_width(1, 14), ==, 4);
+ g_assert_cmpuint(apicid_smt_width(1, 15), ==, 4);
+ g_assert_cmpuint(apicid_smt_width(1, 16), ==, 4);
+ g_assert_cmpuint(apicid_smt_width(1, 17), ==, 5);
+
+
+ g_assert_cmpuint(apicid_core_width(30, 2), ==, 5);
+ g_assert_cmpuint(apicid_core_width(31, 2), ==, 5);
+ g_assert_cmpuint(apicid_core_width(32, 2), ==, 5);
+ g_assert_cmpuint(apicid_core_width(33, 2), ==, 6);
+
+
+ /* build a weird topology and see if IDs are calculated correctly
+ */
+
+ /* This will use 2 bits for thread ID and 3 bits for core ID
+ */
+ g_assert_cmpuint(apicid_smt_width(6, 3), ==, 2);
+ g_assert_cmpuint(apicid_core_width(6, 3), ==, 3);
+ g_assert_cmpuint(apicid_pkg_offset(6, 3), ==, 5);
+
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 0), ==, 0);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1), ==, 1);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2), ==, 2);
+
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 0), ==,
+ (1 << 2) | 0);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 1), ==,
+ (1 << 2) | 1);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 3 + 2), ==,
+ (1 << 2) | 2);
+
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 0), ==,
+ (2 << 2) | 0);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 1), ==,
+ (2 << 2) | 1);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 2 * 3 + 2), ==,
+ (2 << 2) | 2);
+
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 0), ==,
+ (5 << 2) | 0);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 1), ==,
+ (5 << 2) | 1);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 5 * 3 + 2), ==,
+ (5 << 2) | 2);
+
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 6 * 3 + 0 * 3 + 0), ==,
+ (1 << 5));
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 1 * 6 * 3 + 1 * 3 + 1), ==,
+ (1 << 5) | (1 << 2) | 1);
+ g_assert_cmpuint(x86_apicid_from_cpu_idx(6, 3, 3 * 6 * 3 + 5 * 3 + 2), ==,
+ (3 << 5) | (5 << 2) | 2);
+}
+
+int main(int argc, char **argv)
+{
+ g_test_init(&argc, &argv, NULL);
+
+ g_test_add_func("/cpuid/topology/basic", test_topo_bits);
+
+ g_test_run();
+
+ return 0;
+}
diff --git a/trace-events b/trace-events
index 09091e6d17..1011f27676 100644
--- a/trace-events
+++ b/trace-events
@@ -79,10 +79,17 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
# block/mirror.c
mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
mirror_before_flush(void *s) "s %p"
mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
+mirror_cow(void *s, int64_t sector_num) "s %p sector_num %"PRId64
+mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
+mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
+mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
# blockdev.c
qmp_block_job_cancel(void *job) "job %p"
@@ -1060,3 +1067,26 @@ xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
xics_ics_eoi(int nr) "ics_eoi: irq %#x"
+
+# hbitmap.c
+hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
+hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
+hbitmap_set(void *hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
+
+# target-s390x/ioinst.c
+ioinst(const char *insn) "IOINST: %s"
+ioinst_sch_id(const char *insn, int cssid, int ssid, int schid) "IOINST: %s (%x.%x.%04x)"
+ioinst_chp_id(const char *insn, int cssid, int chpid) "IOINST: %s (%x.%02x)"
+ioinst_chsc_cmd(uint16_t cmd, uint16_t len) "IOINST: chsc command %04x, len %04x"
+
+# hw/s390x/css.c
+css_enable_facility(const char *facility) "CSS: enable %s"
+css_crw(uint8_t rsc, uint8_t erc, uint16_t rsid, const char *chained) "CSS: queueing crw: rsc=%x, erc=%x, rsid=%x %s"
+css_chpid_add(uint8_t cssid, uint8_t chpid, uint8_t type) "CSS: add chpid %x.%02x (type %02x)"
+css_new_image(uint8_t cssid, const char *default_cssid) "CSS: add css image %02x %s"
+css_assign_subch(const char *do_assign, uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno) "CSS: %s %x.%x.%04x (devno %04x)"
+css_io_interrupt(int cssid, int ssid, int schid, uint32_t intparm, uint8_t isc, const char *conditional) "CSS: I/O interrupt on sch %x.%x.%04x (intparm %08x, isc %x) %s"
+
+# hw/s390x/virtio-ccw.c
+virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code) "VIRTIO-CCW: %x.%x.%04x: interpret command %x"
+virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char *devno_mode) "VIRTIO-CCW: add subchannel %x.%x.%04x, devno %04x (%s)"
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 5baeb53af6..495a178557 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -2,7 +2,7 @@ util-obj-y = osdep.o cutils.o qemu-timer-common.o
util-obj-$(CONFIG_WIN32) += oslib-win32.o qemu-thread-win32.o event_notifier-win32.o
util-obj-$(CONFIG_POSIX) += oslib-posix.o qemu-thread-posix.o event_notifier-posix.o
util-obj-y += envlist.o path.o host-utils.o cache-utils.o module.o
-util-obj-y += bitmap.o bitops.o
+util-obj-y += bitmap.o bitops.o hbitmap.o
util-obj-y += acl.o
util-obj-y += error.o qemu-error.o
util-obj-$(CONFIG_POSIX) += compatfd.o
diff --git a/util/hbitmap.c b/util/hbitmap.c
new file mode 100644
index 0000000000..2aa487db74
--- /dev/null
+++ b/util/hbitmap.c
@@ -0,0 +1,401 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include <assert.h>
+#include "qemu/osdep.h"
+#include "qemu/hbitmap.h"
+#include "qemu/host-utils.h"
+#include "trace.h"
+
+/* HBitmaps provides an array of bits. The bits are stored as usual in an
+ * array of unsigned longs, but HBitmap is also optimized to provide fast
+ * iteration over set bits; going from one bit to the next is O(logB n)
+ * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
+ * that the number of levels is in fact fixed.
+ *
+ * In order to do this, it stacks multiple bitmaps with progressively coarser
+ * granularity; in all levels except the last, bit N is set iff the N-th
+ * unsigned long is nonzero in the immediately next level. When iteration
+ * completes on the last level it can examine the 2nd-last level to quickly
+ * skip entire words, and even do so recursively to skip blocks of 64 words or
+ * powers thereof (32 on 32-bit machines).
+ *
+ * Given an index in the bitmap, it can be split in group of bits like
+ * this (for the 64-bit case):
+ *
+ * bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word
+ * bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
+ * bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
+ *
+ * So it is easy to move up simply by shifting the index right by
+ * log2(BITS_PER_LONG) bits. To move down, you shift the index left
+ * similarly, and add the word index within the group. Iteration uses
+ * ffs (find first set bit) to find the next word to examine; this
+ * operation can be done in constant time in most current architectures.
+ *
+ * Setting or clearing a range of m bits on all levels, the work to perform
+ * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
+ *
+ * When iterating on a bitmap, each bit (on any level) is only visited
+ * once. Hence, The total cost of visiting a bitmap with m bits in it is
+ * the number of bits that are set in all bitmaps. Unless the bitmap is
+ * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
+ * cost of advancing from one bit to the next is usually constant (worst case
+ * O(logB n) as in the non-amortized complexity).
+ */
+
+struct HBitmap {
+ /* Number of total bits in the bottom level. */
+ uint64_t size;
+
+ /* Number of set bits in the bottom level. */
+ uint64_t count;
+
+ /* A scaling factor. Given a granularity of G, each bit in the bitmap will
+ * will actually represent a group of 2^G elements. Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire page; iteration will only visit the first
+ * bit of each group. Here is an example of operations in a size-16,
+ * granularity-1 HBitmap:
+ *
+ * initial state 00000000
+ * set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8)
+ * reset(start=1, count=3) 00111000 (iter: 4, 6, 8)
+ * set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10)
+ * reset(start=5, count=5) 00000000
+ *
+ * From an implementation point of view, when setting or resetting bits,
+ * the bitmap will scale bit numbers right by this amount of bits. When
+ * iterating, the bitmap will scale bit numbers left by this amount of
+ * bits.
+ */
+ int granularity;
+
+ /* A number of progressively less coarse bitmaps (i.e. level 0 is the
+ * coarsest). Each bit in level N represents a word in level N+1 that
+ * has a set bit, except the last level where each bit represents the
+ * actual bitmap.
+ *
+ * Note that all bitmaps have the same number of levels. Even a 1-bit
+ * bitmap will still allocate HBITMAP_LEVELS arrays.
+ */
+ unsigned long *levels[HBITMAP_LEVELS];
+};
+
+static inline int popcountl(unsigned long l)
+{
+ return BITS_PER_LONG == 32 ? ctpop32(l) : ctpop64(l);
+}
+
+/* Advance hbi to the next nonzero word and return it. hbi->pos
+ * is updated. Returns zero if we reach the end of the bitmap.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
+{
+ size_t pos = hbi->pos;
+ const HBitmap *hb = hbi->hb;
+ unsigned i = HBITMAP_LEVELS - 1;
+
+ unsigned long cur;
+ do {
+ cur = hbi->cur[--i];
+ pos >>= BITS_PER_LEVEL;
+ } while (cur == 0);
+
+ /* Check for end of iteration. We always use fewer than BITS_PER_LONG
+ * bits in the level 0 bitmap; thus we can repurpose the most significant
+ * bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures
+ * that the above loop ends even without an explicit check on i.
+ */
+
+ if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
+ return 0;
+ }
+ for (; i < HBITMAP_LEVELS - 1; i++) {
+ /* Shift back pos to the left, matching the right shifts above.
+ * The index of this word's least significant set bit provides
+ * the low-order bits.
+ */
+ pos = (pos << BITS_PER_LEVEL) + ffsl(cur) - 1;
+ hbi->cur[i] = cur & (cur - 1);
+
+ /* Set up next level for iteration. */
+ cur = hb->levels[i + 1][pos];
+ }
+
+ hbi->pos = pos;
+ trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
+
+ assert(cur);
+ return cur;
+}
+
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
+{
+ unsigned i, bit;
+ uint64_t pos;
+
+ hbi->hb = hb;
+ pos = first >> hb->granularity;
+ assert(pos < hb->size);
+ hbi->pos = pos >> BITS_PER_LEVEL;
+ hbi->granularity = hb->granularity;
+
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ bit = pos & (BITS_PER_LONG - 1);
+ pos >>= BITS_PER_LEVEL;
+
+ /* Drop bits representing items before first. */
+ hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
+
+ /* We have already added level i+1, so the lowest set bit has
+ * been processed. Clear it.
+ */
+ if (i != HBITMAP_LEVELS - 1) {
+ hbi->cur[i] &= ~(1UL << bit);
+ }
+ }
+}
+
+bool hbitmap_empty(const HBitmap *hb)
+{
+ return hb->count == 0;
+}
+
+int hbitmap_granularity(const HBitmap *hb)
+{
+ return hb->granularity;
+}
+
+uint64_t hbitmap_count(const HBitmap *hb)
+{
+ return hb->count << hb->granularity;
+}
+
+/* Count the number of set bits between start and end, not accounting for
+ * the granularity. Also an example of how to use hbitmap_iter_next_word.
+ */
+static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
+{
+ HBitmapIter hbi;
+ uint64_t count = 0;
+ uint64_t end = last + 1;
+ unsigned long cur;
+ size_t pos;
+
+ hbitmap_iter_init(&hbi, hb, start << hb->granularity);
+ for (;;) {
+ pos = hbitmap_iter_next_word(&hbi, &cur);
+ if (pos >= (end >> BITS_PER_LEVEL)) {
+ break;
+ }
+ count += popcountl(cur);
+ }
+
+ if (pos == (end >> BITS_PER_LEVEL)) {
+ /* Drop bits representing the END-th and subsequent items. */
+ int bit = end & (BITS_PER_LONG - 1);
+ cur &= (1UL << bit) - 1;
+ count += popcountl(cur);
+ }
+
+ return count;
+}
+
+/* Setting starts at the last layer and propagates up if an element
+ * changes from zero to non-zero.
+ */
+static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+ unsigned long mask;
+ bool changed;
+
+ assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+ assert(start <= last);
+
+ mask = 2UL << (last & (BITS_PER_LONG - 1));
+ mask -= 1UL << (start & (BITS_PER_LONG - 1));
+ changed = (*elem == 0);
+ *elem |= mask;
+ return changed;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+ size_t pos = start >> BITS_PER_LEVEL;
+ size_t lastpos = last >> BITS_PER_LEVEL;
+ bool changed = false;
+ size_t i;
+
+ i = pos;
+ if (i < lastpos) {
+ uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+ changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
+ for (;;) {
+ start = next;
+ next += BITS_PER_LONG;
+ if (++i == lastpos) {
+ break;
+ }
+ changed |= (hb->levels[level][i] == 0);
+ hb->levels[level][i] = ~0UL;
+ }
+ }
+ changed |= hb_set_elem(&hb->levels[level][i], start, last);
+
+ /* If there was any change in this layer, we may have to update
+ * the one above.
+ */
+ if (level > 0 && changed) {
+ hb_set_between(hb, level - 1, pos, lastpos);
+ }
+}
+
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
+{
+ /* Compute range in the last layer. */
+ uint64_t last = start + count - 1;
+
+ trace_hbitmap_set(hb, start, count,
+ start >> hb->granularity, last >> hb->granularity);
+
+ start >>= hb->granularity;
+ last >>= hb->granularity;
+ count = last - start + 1;
+
+ hb->count += count - hb_count_between(hb, start, last);
+ hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+/* Resetting works the other way round: propagate up if the new
+ * value is zero.
+ */
+static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+ unsigned long mask;
+ bool blanked;
+
+ assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+ assert(start <= last);
+
+ mask = 2UL << (last & (BITS_PER_LONG - 1));
+ mask -= 1UL << (start & (BITS_PER_LONG - 1));
+ blanked = *elem != 0 && ((*elem & ~mask) == 0);
+ *elem &= ~mask;
+ return blanked;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+ size_t pos = start >> BITS_PER_LEVEL;
+ size_t lastpos = last >> BITS_PER_LEVEL;
+ bool changed = false;
+ size_t i;
+
+ i = pos;
+ if (i < lastpos) {
+ uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+
+ /* Here we need a more complex test than when setting bits. Even if
+ * something was changed, we must not blank bits in the upper level
+ * unless the lower-level word became entirely zero. So, remove pos
+ * from the upper-level range if bits remain set.
+ */
+ if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
+ changed = true;
+ } else {
+ pos++;
+ }
+
+ for (;;) {
+ start = next;
+ next += BITS_PER_LONG;
+ if (++i == lastpos) {
+ break;
+ }
+ changed |= (hb->levels[level][i] != 0);
+ hb->levels[level][i] = 0UL;
+ }
+ }
+
+ /* Same as above, this time for lastpos. */
+ if (hb_reset_elem(&hb->levels[level][i], start, last)) {
+ changed = true;
+ } else {
+ lastpos--;
+ }
+
+ if (level > 0 && changed) {
+ hb_reset_between(hb, level - 1, pos, lastpos);
+ }
+}
+
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
+{
+ /* Compute range in the last layer. */
+ uint64_t last = start + count - 1;
+
+ trace_hbitmap_reset(hb, start, count,
+ start >> hb->granularity, last >> hb->granularity);
+
+ start >>= hb->granularity;
+ last >>= hb->granularity;
+
+ hb->count -= hb_count_between(hb, start, last);
+ hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+bool hbitmap_get(const HBitmap *hb, uint64_t item)
+{
+ /* Compute position and bit in the last layer. */
+ uint64_t pos = item >> hb->granularity;
+ unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+
+ return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
+}
+
+void hbitmap_free(HBitmap *hb)
+{
+ unsigned i;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ g_free(hb->levels[i]);
+ }
+ g_free(hb);
+}
+
+HBitmap *hbitmap_alloc(uint64_t size, int granularity)
+{
+ HBitmap *hb = g_malloc0(sizeof (struct HBitmap));
+ unsigned i;
+
+ assert(granularity >= 0 && granularity < 64);
+ size = (size + (1ULL << granularity) - 1) >> granularity;
+ assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+
+ hb->size = size;
+ hb->granularity = granularity;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+ hb->levels[i] = g_malloc0(size * sizeof(unsigned long));
+ }
+
+ /* We necessarily have free bits in level 0 due to the definition
+ * of HBITMAP_LEVELS, so use one for a sentinel. This speeds up
+ * hbitmap_iter_skip_words.
+ */
+ assert(size == 1);
+ hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+ return hb;
+}
diff --git a/vl.c b/vl.c
index 7aab73b736..910abb6526 100644
--- a/vl.c
+++ b/vl.c
@@ -176,6 +176,7 @@ int main(int argc, char **argv)
#define DEFAULT_RAM_SIZE 128
#define MAX_VIRTIO_CONSOLES 1
+#define MAX_SCLP_CONSOLES 1
static const char *data_dir;
const char *bios_name = NULL;
@@ -203,6 +204,7 @@ int no_quit = 0;
CharDriverState *serial_hds[MAX_SERIAL_PORTS];
CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
+CharDriverState *sclp_hds[MAX_SCLP_CONSOLES];
int win2k_install_hack = 0;
int singlestep = 0;
int smp_cpus = 1;
@@ -271,6 +273,7 @@ static int tcg_tb_size;
static int default_serial = 1;
static int default_parallel = 1;
static int default_virtcon = 1;
+static int default_sclp = 1;
static int default_monitor = 1;
static int default_floppy = 1;
static int default_cdrom = 1;
@@ -2340,6 +2343,7 @@ struct device_config {
DEV_VIRTCON, /* -virtioconsole */
DEV_DEBUGCON, /* -debugcon */
DEV_GDB, /* -gdb, -s */
+ DEV_SCLP, /* s390 sclp */
} type;
const char *cmdline;
Location loc;
@@ -2458,6 +2462,39 @@ static int virtcon_parse(const char *devname)
return 0;
}
+static int sclp_parse(const char *devname)
+{
+ QemuOptsList *device = qemu_find_opts("device");
+ static int index = 0;
+ char label[32];
+ QemuOpts *dev_opts;
+
+ if (strcmp(devname, "none") == 0) {
+ return 0;
+ }
+ if (index == MAX_SCLP_CONSOLES) {
+ fprintf(stderr, "qemu: too many sclp consoles\n");
+ exit(1);
+ }
+
+ assert(arch_type == QEMU_ARCH_S390X);
+
+ dev_opts = qemu_opts_create(device, NULL, 0, NULL);
+ qemu_opt_set(dev_opts, "driver", "sclpconsole");
+
+ snprintf(label, sizeof(label), "sclpcon%d", index);
+ sclp_hds[index] = qemu_chr_new(label, devname, NULL);
+ if (!sclp_hds[index]) {
+ fprintf(stderr, "qemu: could not connect sclp console"
+ " to character backend '%s'\n", devname);
+ return -1;
+ }
+ qemu_opt_set(dev_opts, "chardev", label);
+
+ index++;
+ return 0;
+}
+
static int debugcon_parse(const char *devname)
{
QemuOpts *opts;
@@ -3615,6 +3652,7 @@ int main(int argc, char **argv, char **envp)
default_serial = 0;
default_parallel = 0;
default_virtcon = 0;
+ default_sclp = 0;
default_monitor = 0;
default_net = 0;
default_floppy = 0;
@@ -3832,6 +3870,9 @@ int main(int argc, char **argv, char **envp)
if (!machine->use_virtcon) {
default_virtcon = 0;
}
+ if (!machine->use_sclp) {
+ default_sclp = 0;
+ }
if (machine->no_floppy) {
default_floppy = 0;
}
@@ -3873,11 +3914,16 @@ int main(int argc, char **argv, char **envp)
add_device_config(DEV_SERIAL, "mon:stdio");
} else if (default_virtcon && default_monitor) {
add_device_config(DEV_VIRTCON, "mon:stdio");
+ } else if (default_sclp && default_monitor) {
+ add_device_config(DEV_SCLP, "mon:stdio");
} else {
if (default_serial)
add_device_config(DEV_SERIAL, "stdio");
if (default_virtcon)
add_device_config(DEV_VIRTCON, "stdio");
+ if (default_sclp) {
+ add_device_config(DEV_SCLP, "stdio");
+ }
if (default_monitor)
monitor_parse("stdio", "readline");
}
@@ -3890,6 +3936,9 @@ int main(int argc, char **argv, char **envp)
monitor_parse("vc:80Cx24C", "readline");
if (default_virtcon)
add_device_config(DEV_VIRTCON, "vc:80Cx24C");
+ if (default_sclp) {
+ add_device_config(DEV_SCLP, "vc:80Cx24C");
+ }
}
socket_init();
@@ -4060,6 +4109,9 @@ int main(int argc, char **argv, char **envp)
exit(1);
if (foreach_device_config(DEV_VIRTCON, virtcon_parse) < 0)
exit(1);
+ if (foreach_device_config(DEV_SCLP, sclp_parse) < 0) {
+ exit(1);
+ }
if (foreach_device_config(DEV_DEBUGCON, debugcon_parse) < 0)
exit(1);