diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2018-01-24 15:28:36 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2018-01-24 15:28:36 +0000 |
commit | 25bfd5a75fa3e8f5796656c7634e26193f7bedc1 (patch) | |
tree | e8c02a7393555643add8e3b2f4be6b8a015c7ca8 | |
parent | 238e2d93c9ddc9bc6b5392289bed38a4ebff004d (diff) | |
parent | bcbb3866da19cce4360c828b6ec1c2a137757927 (diff) |
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
Pull request
v2:
* Drop merge failure from a previous pull request that broke virtio-blk on ARM
guests
* Add Parallels XML patch series
# gpg: Signature made Mon 22 Jan 2018 16:00:40 GMT
# gpg: using RSA key 0x9CA4ABB381AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>"
# Primary key fingerprint: 8695 A8BF D3F9 7CDA AC35 775A 9CA4 ABB3 81AB 73C8
* remotes/stefanha/tags/block-pull-request:
block/parallels: add backing support to readv/writev
block/parallels: replace some magic numbers
block/parallels: move some structures into header
configure: add dependency
docs/interop/prl-xml: description of Parallels Disk format
block: add block_set_io_throttle virtio-blk-pci QMP example
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r-- | block/Makefile.objs | 2 | ||||
-rw-r--r-- | block/parallels.c | 108 | ||||
-rw-r--r-- | block/parallels.h | 88 | ||||
-rwxr-xr-x | configure | 27 | ||||
-rw-r--r-- | docs/interop/prl-xml.txt | 158 | ||||
-rw-r--r-- | qapi/block-core.json | 18 | ||||
-rwxr-xr-x | scripts/checkpatch.pl | 1 |
7 files changed, 342 insertions, 60 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs index 6eaf78a046..a73387f1bf 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -47,3 +47,5 @@ block-obj-$(if $(CONFIG_BZIP2),m,n) += dmg-bz2.o dmg-bz2.o-libs := $(BZIP2_LIBS) qcow.o-libs := -lz linux-aio.o-libs := -laio +parallels.o-cflags := $(LIBXML2_CFLAGS) +parallels.o-libs := $(LIBXML2_LIBS) diff --git a/block/parallels.c b/block/parallels.c index 9545761f49..d3802085e3 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -36,6 +36,7 @@ #include "qemu/bswap.h" #include "qemu/bitmap.h" #include "migration/blocker.h" +#include "parallels.h" /**************************************************************/ @@ -45,30 +46,6 @@ #define HEADER_INUSE_MAGIC (0x746F6E59) #define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32) -#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */ - - -// always little-endian -typedef struct ParallelsHeader { - char magic[16]; // "WithoutFreeSpace" - uint32_t version; - uint32_t heads; - uint32_t cylinders; - uint32_t tracks; - uint32_t bat_entries; - uint64_t nb_sectors; - uint32_t inuse; - uint32_t data_off; - char padding[12]; -} QEMU_PACKED ParallelsHeader; - - -typedef enum ParallelsPreallocMode { - PRL_PREALLOC_MODE_FALLOCATE = 0, - PRL_PREALLOC_MODE_TRUNCATE = 1, - PRL_PREALLOC_MODE__MAX = 2, -} ParallelsPreallocMode; - static QEnumLookup prealloc_mode_lookup = { .array = (const char *const[]) { "falloc", @@ -77,34 +54,6 @@ static QEnumLookup prealloc_mode_lookup = { .size = PRL_PREALLOC_MODE__MAX }; -typedef struct BDRVParallelsState { - /** Locking is conservative, the lock protects - * - image file extending (truncate, fallocate) - * - any access to block allocation table - */ - CoMutex lock; - - ParallelsHeader *header; - uint32_t header_size; - bool header_unclean; - - unsigned long *bat_dirty_bmap; - unsigned int bat_dirty_block; - - uint32_t *bat_bitmap; - unsigned int bat_size; - - int64_t data_end; - uint64_t prealloc_size; - ParallelsPreallocMode prealloc_mode; - - unsigned int tracks; - - unsigned int off_multiplier; - Error *migration_blocker; -} BDRVParallelsState; - - #define PARALLELS_OPT_PREALLOC_MODE "prealloc-mode" #define PARALLELS_OPT_PREALLOC_SIZE "prealloc-size" @@ -193,6 +142,7 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { + int ret; BDRVParallelsState *s = bs->opaque; int64_t pos, space, idx, to_allocate, i, len; @@ -221,7 +171,6 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, return len; } if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { - int ret; space += s->prealloc_size; if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { ret = bdrv_pwrite_zeroes(bs->file, @@ -237,6 +186,37 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } + /* Try to read from backing to fill empty clusters + * FIXME: 1. previous write_zeroes may be redundant + * 2. most of data we read from backing will be rewritten by + * parallels_co_writev. On aligned-to-cluster write we do not need + * this read at all. + * 3. it would be good to combine write of data from backing and new + * data into one write call */ + if (bs->backing) { + int64_t nb_cow_sectors = to_allocate * s->tracks; + int64_t nb_cow_bytes = nb_cow_sectors << BDRV_SECTOR_BITS; + QEMUIOVector qiov; + struct iovec iov = { + .iov_len = nb_cow_bytes, + .iov_base = qemu_blockalign(bs, nb_cow_bytes) + }; + qemu_iovec_init_external(&qiov, &iov, 1); + + ret = bdrv_co_readv(bs->backing, idx * s->tracks, nb_cow_sectors, + &qiov); + if (ret < 0) { + qemu_vfree(iov.iov_base); + return ret; + } + + ret = bdrv_co_writev(bs->file, s->data_end, nb_cow_sectors, &qiov); + qemu_vfree(iov.iov_base); + if (ret < 0) { + return ret; + } + } + for (i = 0; i < to_allocate; i++) { s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier); s->data_end += s->tracks; @@ -360,12 +340,19 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, nbytes = n << BDRV_SECTOR_BITS; + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); + if (position < 0) { - qemu_iovec_memset(qiov, bytes_done, 0, nbytes); + if (bs->backing) { + ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov); + if (ret < 0) { + break; + } + } else { + qemu_iovec_memset(&hd_qiov, 0, 0, nbytes); + } } else { - qemu_iovec_reset(&hd_qiov); - qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); - ret = bdrv_co_readv(bs->file, position, n, &hd_qiov); if (ret < 0) { break; @@ -527,8 +514,9 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic)); header.version = cpu_to_le32(HEADER_VERSION); /* don't care much about geometry, it is not used on image level */ - header.heads = cpu_to_le32(16); - header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32); + header.heads = cpu_to_le32(HEADS_NUMBER); + header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE + / HEADS_NUMBER / SEC_IN_CYL); header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS); header.bat_entries = cpu_to_le32(bat_entries); header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE)); @@ -798,7 +786,7 @@ static BlockDriver bdrv_parallels = { .bdrv_co_flush_to_os = parallels_co_flush_to_os, .bdrv_co_readv = parallels_co_readv, .bdrv_co_writev = parallels_co_writev, - + .supports_backing = true, .bdrv_create = parallels_create, .bdrv_check = parallels_check, .create_opts = ¶llels_create_opts, diff --git a/block/parallels.h b/block/parallels.h new file mode 100644 index 0000000000..4b044079ef --- /dev/null +++ b/block/parallels.h @@ -0,0 +1,88 @@ +/* +* Block driver for Parallels disk image format +* +* Copyright (c) 2015-2017 Virtuozzo, Inc. +* Authors: +* 2016-2017 Klim S. Kireev <klim.kireev@virtuozzo.com> +* 2015 Denis V. Lunev <den@openvz.org> +* +* This code was originally based on comparing different disk images created +* by Parallels. Currently it is based on opened OpenVZ sources +* available at +* https://github.com/OpenVZ/ploop +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +*/ +#ifndef BLOCK_PARALLELS_H +#define BLOCK_PARALLELS_H +#include "qemu/coroutine.h" +#include "qemu/typedefs.h" + +#define HEADS_NUMBER 16 +#define SEC_IN_CYL 32 +#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */ + +/* always little-endian */ +typedef struct ParallelsHeader { + char magic[16]; /* "WithoutFreeSpace" */ + uint32_t version; + uint32_t heads; + uint32_t cylinders; + uint32_t tracks; + uint32_t bat_entries; + uint64_t nb_sectors; + uint32_t inuse; + uint32_t data_off; + char padding[12]; +} QEMU_PACKED ParallelsHeader; + +typedef enum ParallelsPreallocMode { + PRL_PREALLOC_MODE_FALLOCATE = 0, + PRL_PREALLOC_MODE_TRUNCATE = 1, + PRL_PREALLOC_MODE__MAX = 2, +} ParallelsPreallocMode; + +typedef struct BDRVParallelsState { + /** Locking is conservative, the lock protects + * - image file extending (truncate, fallocate) + * - any access to block allocation table + */ + CoMutex lock; + + ParallelsHeader *header; + uint32_t header_size; + bool header_unclean; + + unsigned long *bat_dirty_bmap; + unsigned int bat_dirty_block; + + uint32_t *bat_bitmap; + unsigned int bat_size; + + int64_t data_end; + uint64_t prealloc_size; + ParallelsPreallocMode prealloc_mode; + + unsigned int tracks; + + unsigned int off_multiplier; + Error *migration_blocker; +} BDRVParallelsState; + +#endif @@ -435,6 +435,7 @@ tcmalloc="no" jemalloc="no" replication="yes" vxhs="" +libxml2="" supported_cpu="no" supported_os="no" @@ -1298,6 +1299,10 @@ for opt do ;; --enable-numa) numa="yes" ;; + --disable-libxml2) libxml2="no" + ;; + --enable-libxml2) libxml2="yes" + ;; --disable-tcmalloc) tcmalloc="no" ;; --enable-tcmalloc) tcmalloc="yes" @@ -1573,6 +1578,7 @@ disabled with --disable-FEATURE, default is enabled if available: tpm TPM support libssh2 ssh block device support numa libnuma support + libxml2 for Parallels image format tcmalloc tcmalloc support jemalloc jemalloc support replication replication support @@ -3748,6 +3754,20 @@ EOF fi fi +########################################## +# libxml2 probe +if test "$libxml2" != "no" ; then + if $pkg_config --exists libxml-2.0; then + libxml2="yes" + libxml2_cflags=$($pkg_config --cflags libxml-2.0) + libxml2_libs=$($pkg_config --libs libxml-2.0) + else + if test "$libxml2" = "yes"; then + feature_not_found "libxml2" "Install libxml2 devel" + fi + libxml2="no" + fi +fi ########################################## # glusterfs probe @@ -5630,6 +5650,7 @@ echo "lzo support $lzo" echo "snappy support $snappy" echo "bzip2 support $bzip2" echo "NUMA host support $numa" +echo "libxml2 $libxml2" echo "tcmalloc support $tcmalloc" echo "jemalloc support $jemalloc" echo "avx2 optimization $avx2_opt" @@ -6299,6 +6320,12 @@ if test "$have_rtnetlink" = "yes" ; then echo "CONFIG_RTNETLINK=y" >> $config_host_mak fi +if test "$libxml2" = "yes" ; then + echo "CONFIG_LIBXML2=y" >> $config_host_mak + echo "LIBXML2_CFLAGS=$libxml2_cflags" >> $config_host_mak + echo "LIBXML2_LIBS=$libxml2_libs" >> $config_host_mak +fi + if test "$replication" = "yes" ; then echo "CONFIG_REPLICATION=y" >> $config_host_mak fi diff --git a/docs/interop/prl-xml.txt b/docs/interop/prl-xml.txt new file mode 100644 index 0000000000..7031f8752c --- /dev/null +++ b/docs/interop/prl-xml.txt @@ -0,0 +1,158 @@ += License = + +Copyright (c) 2015-2017, Virtuozzo, Inc. +Authors: + 2015 Denis Lunev <den@openvz.org> + 2015 Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> + 2016-2017 Klim Kireev <klim.kireev@virtuozzo.com> + 2016-2017 Edgar Kaziakhmedov <edgar.kaziakhmedov@virtuozzo.com> + +This work is licensed under the terms of the GNU GPL, version 2 or later. +See the COPYING file in the top-level directory. + +This specification contains minimal information about Parallels Disk Format, +which is enough to proper work with QEMU. Nevertheless, Parallels Cloud Server +and Parallels Desktop are able to add some unspecified nodes to xml and use +them, but they are for internal work and don't affect functionality. Also it +uses auxiliary xml "Snapshot.xml", which allows to store optional snapshot +information, but it doesn't influence open/read/write functionality. QEMU and +other software should not use fields not covered in this document and +Snapshot.xml file and must leave them as is. + += Parallels Disk Format = + +Parallels disk consists of two parts: the set of snapshots and the disk +descriptor file, which stores information about all files and snapshots. + +== Definitions == + Snapshot a record of the contents captured at a particular time, + capable of storing current state. A snapshot has UUID and + parent UUID. + + Snapshot image an overlay representing the difference between this + snapshot and some earlier snapshot. + + Overlay an image storing the different sectors between two captured + states. + + Root image snapshot image with no parent, the root of snapshot tree. + + Storage the backing storage for a subset of the virtual disk. When + there is more than one storage in a Parallels disk then that + is referred to as a split image. In this case every storage + covers specific address space area of the disk and has its + particular root image. Split images are not considered here + and are not supported. Each storage consists of disk + parameters and a list of images. The list of images always + contains a root image and may also contain overlays. The + root image can be an expandable Parallels image file or + plain. Overlays must be expandable. + + Description DiskDescriptor.xml stores information about disk parameters, + file snapshots, storages. + + Top The overlay between actual state and some previous snapshot. + Snapshot It is not a snapshot in the classical sense because it + serves as the active image that the guest writes to. + + Sector a 512-byte data chunk. + +== Description file == +All information is placed in a single XML element Parallels_disk_image. +The element has only one attribute "Version", that must be 1.0. +Schema of DiskDescriptor.xml: + +<Parallels_disk_image Version="1.0"> + <Disk_Parameters> + ... + </Disk_Parameters> + <StorageData> + ... + </StorageData> + <Snapshots> + ... + </Snapshots> +</Parallels_disk_image> + +== Disk_Parameters element == +The Disk_Parameters element describes the physical layout of the virtual disk +and some general settings. + +The Disk_Parameters element MUST contain the following child elements: + * Disk_size - number of sectors in the disk, + desired size of the disk. + * Cylinders - number of the disk cylinders. + * Heads - number of the disk heads. + * Sectors - number of the disk sectors per cylinder + (sector size is 512 bytes) + Limitation: Product of the Heads, Sectors and Cylinders + values MUST be equal to the value of the Disk_size parameter. + * Padding - must be 0. Parallels Cloud Server and Parallels Desktop may + use padding set to 1, however this case is not covered + by this spec, QEMU and other software should not open + such disks and should not create them. + +== StorageData element == +This element of the file describes the root image and all snapshot images. + +The StorageData element consists of the Storage child element, as shown below: +<StorageData> + <Storage> + ... + </Storage> +</StorageData> + +A Storage element has following child elements: + * Start - start sector of the storage, in case of non split storage + equals to 0. + * End - number of sector following the last sector, in case of non + split storage equals to Disk_size. + * Blocksize - storage cluster size, number of sectors per one cluster. + Cluster size for each "Compressed" (see below) image in + parallels disk must be equal to this field. Note: cluster + size for Parallels Expandable Image is in 'tracks' field of + its header (see docs/interop/parallels.txt). + * Several Image child elements. + +Each Image element has following child elements: + * GUID - image identifier, UUID in curly brackets. + For instance, {12345678-9abc-def1-2345-6789abcdef12}. + The GUID is used by the Snapshots element to reference images + (see below) + * Type - image type of the element. It can be: + "Plain" for raw files. + "Compressed" for expanding disks. + * File - path to image file. Path can be relative to DiskDecriptor.xml or + absolute. + +== Snapshots element == +The Snapshots element describes the snapshot relations with the snapshot tree. + +The element contains the set of Shot child elements, as shown below: +<Snapshots> + <TopGUID> ... </TopGUID> /* Optional child element */ + <Shot> + ... + </Shot> + <Shot> + ... + </Shot> + ... +</Snapshots> + +Each Shot element contains the following child elements: + * GUID - an image GUID. + * ParentGUID - GUID of the image of the parent snapshot. + +The software may traverse snapshots from child to parent using <ParentGUID> +field as reference. ParentGUID of root snapshot is +{00000000-0000-0000-0000-000000000000}. There should be only one root +snapshot. Top snapshot could be described via two ways: via TopGUID child +element of the Snapshots element or via predefined GUID +{5fbaabe3-6958-40ff-92a7-860e329aab41}. If TopGUID is defined, predefined GUID is +interpreted as usual GUID. All snapshot images (except Top Snapshot) should be +opened read-only. There is another predefined GUID, +BackupID = {704718e1-2314-44c8-9087-d78ed36b0f4e}, which is used by original and +some third-party software for backup, QEMU and other software may operate with +images with GUID = BackupID as usual, however, it is not recommended to use this +GUID for new disks. Top snapshot cannot have this GUID. diff --git a/qapi/block-core.json b/qapi/block-core.json index e94a6881b2..4e84cf29db 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1799,6 +1799,24 @@ # Example: # # -> { "execute": "block_set_io_throttle", +# "arguments": { "id": "virtio-blk-pci0/virtio-backend", +# "bps": 0, +# "bps_rd": 0, +# "bps_wr": 0, +# "iops": 512, +# "iops_rd": 0, +# "iops_wr": 0, +# "bps_max": 0, +# "bps_rd_max": 0, +# "bps_wr_max": 0, +# "iops_max": 0, +# "iops_rd_max": 0, +# "iops_wr_max": 0, +# "bps_max_length": 0, +# "iops_size": 0 } } +# <- { "return": {} } +# +# -> { "execute": "block_set_io_throttle", # "arguments": { "id": "ide0-1-0", # "bps": 1000000, # "bps_rd": 0, diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index accba24a31..1b4b812e28 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -265,6 +265,7 @@ our @typeList = ( qr{${Ident}_handler_fn}, qr{target_(?:u)?long}, qr{hwaddr}, + qr{xml${Ident}}, ); # This can be modified by sub possible. Since it can be empty, be careful |