Merge remote-tracking branch 'remotes/borntraeger/tags/kvm-s390-20140901' into staging

s390x/kvm: Several updates/fixes/features 1. s390x/kvm: avoid synchronize_rcu's in kernel ---------------------------------------------- The first patches change s390x/kvm code to issue VCPU specific ioctls from the VCPU thread. This will avoid unnecessary synchronize_rcu in the kernel, which caused a noticably slowdown with many guest CPUs. It speeds up all start/restart/reset operations involving cpus drastically. 2. s390-ccw.img: block size and DASD format support --------------------------------------------------- The second part changes the s390-ccw bios to IPL (boot) more disk formats than before. Furthermore a small fix is made to the console output of the bios. 3. s390: Support for Hotplug of Standby Memory ---------------------------------------------- The third part adds support in s390 for a pool of standby memory, which can be set online/offline by the guest (ie, via chmem). The standby pool of memory is allocated as the difference between the initial memory setting and the maxmem setting. As part of this work, additional results are provided for the Read SCP Information SCLP, and new implentation is added for the Read Storage Element Information, Attach Storage Element, Assign Storage and Unassign Storage SCLPs, which enables the s390 guest to manipulate the standby memory pool. This patchset is based on work originally done by Jeng-Fang (Nick) Wang. Sample qemu command snippet: qemu -machine s390-ccw-virtio -m 1024M,maxmem=2048M,slots=32 -enable-kvm This will allocate 1024M of active memory, and another 1024M of standby memory. Example output from s390-tools lsmem: ============================================================================= 0x0000000000000000-0x000000000fffffff 256 online no 0-127 0x0000000010000000-0x000000001fffffff 256 online yes 128-255 0x0000000020000000-0x000000003fffffff 512 online no 256-511 0x0000000040000000-0x000000007fffffff 1024 offline - 512-1023 Memory device size : 2 MB Memory block size : 256 MB Total online memory : 1024 MB Total offline memory: 1024 MB The guest can dynamically enable part or all of the standby pool via the s390-tools chmem, for example: chmem -e 512M And can attempt to dynamically disable: chmem -d 512M 4. s390x/gdb: various fixes --------------------------- * Patch 1 fixes a bug where the cc was changed accidentally. * Patch 2 adds the gdb feature XML files for s390x * Patch 3 Define acr and fpr registers as coprocessor registers. This allows us to reuse the feature XML files. * Patch 4 whitespace fixes # gpg: Signature made Mon 01 Sep 2014 12:53:39 BST using RSA key ID B5A61C7C # gpg: Can't check signature: public key not found * remotes/borntraeger/tags/kvm-s390-20140901: s390x/gdb: coding style fixes s390x/gdb: generate target.xml and handle fp/ac as coprocessors s390x/gdb: add the feature xml files for s390x s390x/gdb: don't touch the cc if tcg is not enabled sclp-s390: Add memory hotplug SCLPs s390-virtio: Apply same memory boundaries as virtio-ccw virtio-ccw: Include standby memory when calculating storage increment sclp-s390: Add device to manage s390 memory hotplug pc-bios/s390-ccw.img binary update pc-bios/s390-ccw: Do proper console setup pc-bios/s390-ccw: IPL from DASD with format variations pc-bios/s390-ccw Really big EAV ECKD DASD handling pc-bios/s390-ccw Improve ECKD informational message pc-bios/s390-ccw: handle more ECKD DASD block sizes pc-bios/s390-ccw: support all virtio block size s390x/kvm: execute the first cpu reset on the vcpu thread s390x/kvm: execute "system reset" cpu resets on the vcpu thread s390x/kvm: execute sigp orders on the target vcpu thread s390x/kvm: run guest triggered resets on the target vcpu thread Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2014-09-01 13:57:45 +0100
committer: Peter Maydell <peter.maydell@linaro.org> 2014-09-01 13:57:46 +0100
commit: 5cd1475d28fc6e3d617e6cc47ab7c8050cf7fa40 (patch)
tree: 99e70b5281c462f367a79b6c4520b00602ce0f5a /hw
parent: 988f46361465db0d4fce50e71fa0ff8f9d20483e (diff)
parent: 218829db2303e3d61f901f1d12fd4f7cd03644e1 (diff)
3 files changed, 330 insertions, 20 deletions
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 004b2c20c5..e538b1f686 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -17,6 +17,7 @@
 #include "ioinst.h"
 #include "css.h"
 #include "virtio-ccw.h"
+#include "qemu/config-file.h"
 
 #define TYPE_S390_CCW_MACHINE               "s390-ccw-machine"
 
@@ -86,17 +87,35 @@ static void ccw_init(MachineState *machine)
     ram_addr_t my_ram_size = machine->ram_size;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
-    int shift = 0;
+    sclpMemoryHotplugDev *mhd = init_sclp_memory_hotplug_dev();
     uint8_t *storage_keys;
     int ret;
     VirtualCssBus *css_bus;
-
-    /* s390x ram size detection needs a 16bit multiplier + an increment. So
-       guests > 64GB can be specified in 2MB steps etc. */
-    while ((my_ram_size >> (20 + shift)) > 65535) {
-        shift++;
+    QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory"), NULL);
+    ram_addr_t pad_size = 0;
+    ram_addr_t maxmem = qemu_opt_get_size(opts, "maxmem", my_ram_size);
+    ram_addr_t standby_mem_size = maxmem - my_ram_size;
+
+    /* The storage increment size is a multiple of 1M and is a power of 2.
+     * The number of storage increments must be MAX_STORAGE_INCREMENTS or fewer.
+     * The variable 'mhd->increment_size' is an exponent of 2 that can be
+     * used to calculate the size (in bytes) of an increment. */
+    mhd->increment_size = 20;
+    while ((my_ram_size >> mhd->increment_size) > MAX_STORAGE_INCREMENTS) {
+        mhd->increment_size++;
+    }
+    while ((standby_mem_size >> mhd->increment_size) > MAX_STORAGE_INCREMENTS) {
+        mhd->increment_size++;
     }
-    my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
+
+    /* The core and standby memory areas need to be aligned with
+     * the increment size.  In effect, this can cause the
+     * user-specified memory size to be rounded down to align
+     * with the nearest increment boundary. */
+    standby_mem_size = standby_mem_size >> mhd->increment_size
+                                        << mhd->increment_size;
+    my_ram_size = my_ram_size >> mhd->increment_size
+                              << mhd->increment_size;
 
     /* let's propagate the changed ram size into the global variable. */
     ram_size = my_ram_size;
@@ -111,11 +130,22 @@ static void ccw_init(MachineState *machine)
     /* register hypercalls */
     virtio_ccw_register_hcalls();
 
-    /* allocate RAM */
+    /* allocate RAM for core */
     memory_region_init_ram(ram, NULL, "s390.ram", my_ram_size);
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(sysmem, 0, ram);
 
+    /* If the size of ram is not on a MEM_SECTION_SIZE boundary,
+       calculate the pad size necessary to force this boundary. */
+    if (standby_mem_size) {
+        if (my_ram_size % MEM_SECTION_SIZE) {
+            pad_size = MEM_SECTION_SIZE - my_ram_size % MEM_SECTION_SIZE;
+        }
+        my_ram_size += standby_mem_size + pad_size;
+        mhd->pad_size = pad_size;
+        mhd->standby_mem_size = standby_mem_size;
+    }
+
     /* allocate storage keys */
     storage_keys = g_malloc0(my_ram_size / TARGET_PAGE_SIZE);
 
diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
index 1a75a1cf81..4ca52b7190 100644
--- a/hw/s390x/s390-virtio.c
+++ b/hw/s390x/s390-virtio.c
@@ -230,18 +230,21 @@ static void s390_init(MachineState *machine)
     ram_addr_t my_ram_size = machine->ram_size;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
-    int shift = 0;
+    int increment_size = 20;
     uint8_t *storage_keys;
     void *virtio_region;
     hwaddr virtio_region_len;
     hwaddr virtio_region_start;
 
-    /* s390x ram size detection needs a 16bit multiplier + an increment. So
-       guests > 64GB can be specified in 2MB steps etc. */
-    while ((my_ram_size >> (20 + shift)) > 65535) {
-        shift++;
+    /*
+     * The storage increment size is a multiple of 1M and is a power of 2.
+     * The number of storage increments must be MAX_STORAGE_INCREMENTS or
+     * fewer.
+     */
+    while ((my_ram_size >> increment_size) > MAX_STORAGE_INCREMENTS) {
+        increment_size++;
     }
-    my_ram_size = my_ram_size >> (20 + shift) << (20 + shift);
+    my_ram_size = my_ram_size >> increment_size << increment_size;
 
     /* let's propagate the changed ram size into the global variable. */
     ram_size = my_ram_size;
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index d8ddf35e58..02b3275132 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -16,7 +16,8 @@
 #include "sysemu/kvm.h"
 #include "exec/memory.h"
 #include "sysemu/sysemu.h"
-
+#include "exec/address-spaces.h"
+#include "qemu/config-file.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/event-facility.h"
 
@@ -33,10 +34,19 @@ static inline SCLPEventFacility *get_event_facility(void)
 static void read_SCP_info(SCCB *sccb)
 {
     ReadInfo *read_info = (ReadInfo *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
     CPUState *cpu;
-    int shift = 0;
     int cpu_count = 0;
     int i = 0;
+    int increment_size = 20;
+    int rnsize, rnmax;
+    QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory"), NULL);
+    int slots = qemu_opt_get_number(opts, "slots", 0);
+    int max_avail_slots = s390_get_memslot_count(kvm_state);
+
+    if (slots > max_avail_slots) {
+        slots = max_avail_slots;
+    }
 
     CPU_FOREACH(cpu) {
         cpu_count++;
@@ -54,14 +64,235 @@ static void read_SCP_info(SCCB *sccb)
 
     read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO);
 
-    while ((ram_size >> (20 + shift)) > 65535) {
-        shift++;
+    /*
+     * The storage increment size is a multiple of 1M and is a power of 2.
+     * The number of storage increments must be MAX_STORAGE_INCREMENTS or fewer.
+     */
+    while ((ram_size >> increment_size) > MAX_STORAGE_INCREMENTS) {
+        increment_size++;
+    }
+    rnmax = ram_size >> increment_size;
+
+    /* Memory Hotplug is only supported for the ccw machine type */
+    if (mhd) {
+        while ((mhd->standby_mem_size >> increment_size) >
+               MAX_STORAGE_INCREMENTS) {
+            increment_size++;
+        }
+        assert(increment_size == mhd->increment_size);
+
+        mhd->standby_subregion_size = MEM_SECTION_SIZE;
+        /* Deduct the memory slot already used for core */
+        if (slots > 0) {
+            while ((mhd->standby_subregion_size * (slots - 1)
+                    < mhd->standby_mem_size)) {
+                mhd->standby_subregion_size = mhd->standby_subregion_size << 1;
+            }
+        }
+        /*
+         * Initialize mapping of guest standby memory sections indicating which
+         * are and are not online. Assume all standby memory begins offline.
+         */
+        if (mhd->standby_state_map == 0) {
+            if (mhd->standby_mem_size % mhd->standby_subregion_size) {
+                mhd->standby_state_map = g_malloc0((mhd->standby_mem_size /
+                                             mhd->standby_subregion_size + 1) *
+                                             (mhd->standby_subregion_size /
+                                             MEM_SECTION_SIZE));
+            } else {
+                mhd->standby_state_map = g_malloc0(mhd->standby_mem_size /
+                                                   MEM_SECTION_SIZE);
+            }
+        }
+        mhd->padded_ram_size = ram_size + mhd->pad_size;
+        mhd->rzm = 1 << mhd->increment_size;
+        rnmax = ((ram_size + mhd->standby_mem_size + mhd->pad_size)
+             >> mhd->increment_size);
+
+        read_info->facilities |= cpu_to_be64(SCLP_FC_ASSIGN_ATTACH_READ_STOR);
+    }
+
+    rnsize = 1 << (increment_size - 20);
+    if (rnsize <= 128) {
+        read_info->rnsize = rnsize;
+    } else {
+        read_info->rnsize = 0;
+        read_info->rnsize2 = cpu_to_be32(rnsize);
+    }
+
+    if (rnmax < 0x10000) {
+        read_info->rnmax = cpu_to_be16(rnmax);
+    } else {
+        read_info->rnmax = cpu_to_be16(0);
+        read_info->rnmax2 = cpu_to_be64(rnmax);
     }
-    read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift));
-    read_info->rnsize = 1 << shift;
+
     sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
 }
 
+static void read_storage_element0_info(SCCB *sccb)
+{
+    int i, assigned;
+    int subincrement_id = SCLP_STARTING_SUBINCREMENT_ID;
+    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+
+    assert(mhd);
+
+    if ((ram_size >> mhd->increment_size) >= 0x10000) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
+        return;
+    }
+
+    /* Return information regarding core memory */
+    storage_info->max_id = cpu_to_be16(mhd->standby_mem_size ? 1 : 0);
+    assigned = ram_size >> mhd->increment_size;
+    storage_info->assigned = cpu_to_be16(assigned);
+
+    for (i = 0; i < assigned; i++) {
+        storage_info->entries[i] = cpu_to_be32(subincrement_id);
+        subincrement_id += SCLP_INCREMENT_UNIT;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
+}
+
+static void read_storage_element1_info(SCCB *sccb)
+{
+    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+
+    assert(mhd);
+
+    if ((mhd->standby_mem_size >> mhd->increment_size) >= 0x10000) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
+        return;
+    }
+
+    /* Return information regarding standby memory */
+    storage_info->max_id = cpu_to_be16(mhd->standby_mem_size ? 1 : 0);
+    storage_info->assigned = cpu_to_be16(mhd->standby_mem_size >>
+                                         mhd->increment_size);
+    storage_info->standby = cpu_to_be16(mhd->standby_mem_size >>
+                                        mhd->increment_size);
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_STANDBY_READ_COMPLETION);
+}
+
+static void attach_storage_element(SCCB *sccb, uint16_t element)
+{
+    int i, assigned, subincrement_id;
+    AttachStorageElement *attach_info = (AttachStorageElement *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+
+    assert(mhd);
+
+    if (element != 1) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
+        return;
+    }
+
+    assigned = mhd->standby_mem_size >> mhd->increment_size;
+    attach_info->assigned = cpu_to_be16(assigned);
+    subincrement_id = ((ram_size >> mhd->increment_size) << 16)
+                      + SCLP_STARTING_SUBINCREMENT_ID;
+    for (i = 0; i < assigned; i++) {
+        attach_info->entries[i] = cpu_to_be32(subincrement_id);
+        subincrement_id += SCLP_INCREMENT_UNIT;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
+static void assign_storage(SCCB *sccb)
+{
+    MemoryRegion *mr = NULL;
+    uint64_t this_subregion_size;
+    AssignStorage *assign_info = (AssignStorage *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+    assert(mhd);
+    ram_addr_t assign_addr = (assign_info->rn - 1) * mhd->rzm;
+    MemoryRegion *sysmem = get_system_memory();
+
+    if ((assign_addr % MEM_SECTION_SIZE == 0) &&
+        (assign_addr >= mhd->padded_ram_size)) {
+        /* Re-use existing memory region if found */
+        mr = memory_region_find(sysmem, assign_addr, 1).mr;
+        if (!mr) {
+
+            MemoryRegion *standby_ram = g_new(MemoryRegion, 1);
+
+            /* offset to align to standby_subregion_size for allocation */
+            ram_addr_t offset = assign_addr -
+                                (assign_addr - mhd->padded_ram_size)
+                                % mhd->standby_subregion_size;
+
+            /* strlen("standby.ram") + 4 (Max of KVM_MEMORY_SLOTS) +  NULL */
+            char id[16];
+            snprintf(id, 16, "standby.ram%d",
+                     (int)((offset - mhd->padded_ram_size) /
+                     mhd->standby_subregion_size) + 1);
+
+            /* Allocate a subregion of the calculated standby_subregion_size */
+            if (offset + mhd->standby_subregion_size >
+                mhd->padded_ram_size + mhd->standby_mem_size) {
+                this_subregion_size = mhd->padded_ram_size +
+                  mhd->standby_mem_size - offset;
+            } else {
+                this_subregion_size = mhd->standby_subregion_size;
+            }
+
+            memory_region_init_ram(standby_ram, NULL, id, this_subregion_size);
+            vmstate_register_ram_global(standby_ram);
+            memory_region_add_subregion(sysmem, offset, standby_ram);
+        }
+        /* The specified subregion is no longer in standby */
+        mhd->standby_state_map[(assign_addr - mhd->padded_ram_size)
+                               / MEM_SECTION_SIZE] = 1;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
+static void unassign_storage(SCCB *sccb)
+{
+    MemoryRegion *mr = NULL;
+    AssignStorage *assign_info = (AssignStorage *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+    assert(mhd);
+    ram_addr_t unassign_addr = (assign_info->rn - 1) * mhd->rzm;
+    MemoryRegion *sysmem = get_system_memory();
+
+    /* if the addr is a multiple of 256 MB */
+    if ((unassign_addr % MEM_SECTION_SIZE == 0) &&
+        (unassign_addr >= mhd->padded_ram_size)) {
+        mhd->standby_state_map[(unassign_addr -
+                           mhd->padded_ram_size) / MEM_SECTION_SIZE] = 0;
+
+        /* find the specified memory region and destroy it */
+        mr = memory_region_find(sysmem, unassign_addr, 1).mr;
+        if (mr) {
+            int i;
+            int is_removable = 1;
+            ram_addr_t map_offset = (unassign_addr - mhd->padded_ram_size -
+                                     (unassign_addr - mhd->padded_ram_size)
+                                     % mhd->standby_subregion_size);
+            /* Mark all affected subregions as 'standby' once again */
+            for (i = 0;
+                 i < (mhd->standby_subregion_size / MEM_SECTION_SIZE);
+                 i++) {
+
+                if (mhd->standby_state_map[i + map_offset / MEM_SECTION_SIZE]) {
+                    is_removable = 0;
+                    break;
+                }
+            }
+            if (is_removable) {
+                memory_region_del_subregion(sysmem, mr);
+                object_unparent(OBJECT(mr));
+                g_free(mr);
+            }
+        }
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
 /* Provide information about the CPU */
 static void sclp_read_cpu_info(SCCB *sccb)
 {
@@ -103,6 +334,22 @@ static void sclp_execute(SCCB *sccb, uint32_t code)
     case SCLP_CMDW_READ_CPU_INFO:
         sclp_read_cpu_info(sccb);
         break;
+    case SCLP_READ_STORAGE_ELEMENT_INFO:
+        if (code & 0xff00) {
+            read_storage_element1_info(sccb);
+        } else {
+            read_storage_element0_info(sccb);
+        }
+        break;
+    case SCLP_ATTACH_STORAGE_ELEMENT:
+        attach_storage_element(sccb, (code & 0xff00) >> 8);
+        break;
+    case SCLP_ASSIGN_STORAGE:
+        assign_storage(sccb);
+        break;
+    case SCLP_UNASSIGN_STORAGE:
+        unassign_storage(sccb);
+        break;
     default:
         efc->command_handler(ef, sccb, code);
         break;
@@ -183,3 +430,33 @@ void s390_sclp_init(void)
                               OBJECT(dev), NULL);
     qdev_init_nofail(dev);
 }
+
+sclpMemoryHotplugDev *init_sclp_memory_hotplug_dev(void)
+{
+    DeviceState *dev;
+    dev = qdev_create(NULL, TYPE_SCLP_MEMORY_HOTPLUG_DEV);
+    object_property_add_child(qdev_get_machine(),
+                              TYPE_SCLP_MEMORY_HOTPLUG_DEV,
+                              OBJECT(dev), NULL);
+    qdev_init_nofail(dev);
+    return SCLP_MEMORY_HOTPLUG_DEV(object_resolve_path(
+                                   TYPE_SCLP_MEMORY_HOTPLUG_DEV, NULL));
+}
+
+sclpMemoryHotplugDev *get_sclp_memory_hotplug_dev(void)
+{
+    return SCLP_MEMORY_HOTPLUG_DEV(object_resolve_path(
+                                   TYPE_SCLP_MEMORY_HOTPLUG_DEV, NULL));
+}
+
+static TypeInfo sclp_memory_hotplug_dev_info = {
+    .name = TYPE_SCLP_MEMORY_HOTPLUG_DEV,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(sclpMemoryHotplugDev),
+};
+
+static void register_types(void)
+{
+    type_register_static(&sclp_memory_hotplug_dev_info);
+}
+type_init(register_types);
author	Peter Maydell <peter.maydell@linaro.org>	2014-09-01 13:57:45 +0100
committer	Peter Maydell <peter.maydell@linaro.org>	2014-09-01 13:57:46 +0100
commit	5cd1475d28fc6e3d617e6cc47ab7c8050cf7fa40 (patch)
tree	99e70b5281c462f367a79b6c4520b00602ce0f5a /hw
parent	988f46361465db0d4fce50e71fa0ff8f9d20483e (diff)
parent	218829db2303e3d61f901f1d12fd4f7cd03644e1 (diff)