diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-05-22 13:25:40 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-05-22 13:25:40 +0100 |
commit | 8b6db32a4ec47d1171ccfa21d557096b99f4eef0 (patch) | |
tree | eca89cb63571cd7c7c5806d8007928fdda63e9da /block | |
parent | f5790c3bc81702c98c7ddadedb274758cff8cbe7 (diff) | |
parent | a53f1a95f9605f300fbafbc8b60b8a8c67e9c4b4 (diff) |
Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
# gpg: Signature made Fri May 22 10:00:53 2015 BST using RSA key ID 81AB73C8
# gpg: Good signature from "Stefan Hajnoczi <stefanha@redhat.com>"
# gpg: aka "Stefan Hajnoczi <stefanha@gmail.com>"
* remotes/stefanha/tags/block-pull-request: (38 commits)
block: get_block_status: use "else" when testing the opposite condition
qemu-iotests: Test unaligned sub-block zero write
block: Fix NULL deference for unaligned write if qiov is NULL
Revert "block: Fix unaligned zero write"
block: align bounce buffers to page
block: minimal bounce buffer alignment
block: return EPERM on writes or discards to read-only devices
configure: Add workaround for ccache and clang
configure: silence glib unknown attribute __alloc_size__
configure: factor out supported flag check
configure: handle clang -nopie argument warning
block/parallels: improve image writing performance further
block/parallels: optimize linear image expansion
block/parallels: add prealloc-mode and prealloc-size open paramemets
block/parallels: delay writing to BAT till bdrv_co_flush_to_os
block/parallels: create bat_entry_off helper
block/parallels: improve image reading performance
iotests, parallels: check for incorrectly closed image in tests
block/parallels: implement incorrect close detection
block/parallels: implement parallels_check method of block driver
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'block')
-rw-r--r-- | block/io.c | 159 | ||||
-rw-r--r-- | block/parallels.c | 679 | ||||
-rw-r--r-- | block/raw-posix.c | 14 |
3 files changed, 731 insertions, 121 deletions
diff --git a/block/io.c b/block/io.c index 1ce62c4fbc..e394d92626 100644 --- a/block/io.c +++ b/block/io.c @@ -201,9 +201,11 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) } bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; + bs->bl.min_mem_alignment = bs->file->bl.min_mem_alignment; bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; } else { - bs->bl.opt_mem_alignment = 512; + bs->bl.min_mem_alignment = 512; + bs->bl.opt_mem_alignment = getpagesize(); } if (bs->backing_hd) { @@ -221,6 +223,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.opt_mem_alignment = MAX(bs->bl.opt_mem_alignment, bs->backing_hd->bl.opt_mem_alignment); + bs->bl.min_mem_alignment = + MAX(bs->bl.min_mem_alignment, + bs->backing_hd->bl.min_mem_alignment); } /* Then let the driver override it */ @@ -929,19 +934,6 @@ out: return ret; } -static inline uint64_t bdrv_get_align(BlockDriverState *bs) -{ - /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ - return MAX(BDRV_SECTOR_SIZE, bs->request_alignment); -} - -static inline bool bdrv_req_is_aligned(BlockDriverState *bs, - int64_t offset, size_t bytes) -{ - int64_t align = bdrv_get_align(bs); - return !(offset & (align - 1) || (bytes & (align - 1))); -} - /* * Handle a read request in coroutine context */ @@ -952,7 +944,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, BlockDriver *drv = bs->drv; BdrvTrackedRequest req; - uint64_t align = bdrv_get_align(bs); + /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); uint8_t *head_buf = NULL; uint8_t *tail_buf = NULL; QEMUIOVector local_qiov; @@ -1186,6 +1179,94 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, return ret; } +static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, + int64_t offset, + unsigned int bytes, + BdrvRequestFlags flags, + BdrvTrackedRequest *req) +{ + uint8_t *buf = NULL; + QEMUIOVector local_qiov; + struct iovec iov; + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + unsigned int head_padding_bytes, tail_padding_bytes; + int ret = 0; + + head_padding_bytes = offset & (align - 1); + tail_padding_bytes = align - ((offset + bytes) & (align - 1)); + + + assert(flags & BDRV_REQ_ZERO_WRITE); + if (head_padding_bytes || tail_padding_bytes) { + buf = qemu_blockalign(bs, align); + iov = (struct iovec) { + .iov_base = buf, + .iov_len = align, + }; + qemu_iovec_init_external(&local_qiov, &iov, 1); + } + if (head_padding_bytes) { + uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); + + /* RMW the unaligned part before head. */ + mark_request_serialising(req, align); + wait_serialising_requests(req); + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); + ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align, + align, &local_qiov, 0); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + + memset(buf + head_padding_bytes, 0, zero_bytes); + ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align, + &local_qiov, + flags & ~BDRV_REQ_ZERO_WRITE); + if (ret < 0) { + goto fail; + } + offset += zero_bytes; + bytes -= zero_bytes; + } + + assert(!bytes || (offset & (align - 1)) == 0); + if (bytes >= align) { + /* Write the aligned part in the middle. */ + uint64_t aligned_bytes = bytes & ~(align - 1); + ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, + NULL, flags); + if (ret < 0) { + goto fail; + } + bytes -= aligned_bytes; + offset += aligned_bytes; + } + + assert(!bytes || (offset & (align - 1)) == 0); + if (bytes) { + assert(align == tail_padding_bytes + bytes); + /* RMW the unaligned part after tail. */ + mark_request_serialising(req, align); + wait_serialising_requests(req); + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); + ret = bdrv_aligned_preadv(bs, req, offset, align, + align, &local_qiov, 0); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + + memset(buf, 0, bytes); + ret = bdrv_aligned_pwritev(bs, req, offset, align, + &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); + } +fail: + qemu_vfree(buf); + return ret; + +} + /* * Handle a write request in coroutine context */ @@ -1194,7 +1275,8 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, BdrvRequestFlags flags) { BdrvTrackedRequest req; - uint64_t align = bdrv_get_align(bs); + /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); uint8_t *head_buf = NULL; uint8_t *tail_buf = NULL; QEMUIOVector local_qiov; @@ -1205,7 +1287,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, return -ENOMEDIUM; } if (bs->read_only) { - return -EACCES; + return -EPERM; } ret = bdrv_check_byte_request(bs, offset, bytes); @@ -1225,6 +1307,11 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, */ tracked_request_begin(&req, bs, offset, bytes, true); + if (!qiov) { + ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req); + goto out; + } + if (offset & (align - 1)) { QEMUIOVector head_qiov; struct iovec head_iov; @@ -1293,23 +1380,19 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, bytes = ROUND_UP(bytes, align); } - if (use_local_qiov) { - /* Local buffer may have non-zero data. */ - flags &= ~BDRV_REQ_ZERO_WRITE; - } ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, use_local_qiov ? &local_qiov : qiov, flags); fail: - tracked_request_end(&req); if (use_local_qiov) { qemu_iovec_destroy(&local_qiov); } qemu_vfree(head_buf); qemu_vfree(tail_buf); - +out: + tracked_request_end(&req); return ret; } @@ -1337,32 +1420,14 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { - int ret; - trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); if (!(bs->open_flags & BDRV_O_UNMAP)) { flags &= ~BDRV_REQ_MAY_UNMAP; } - if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS)) { - ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, - BDRV_REQ_ZERO_WRITE | flags); - } else { - uint8_t *buf; - QEMUIOVector local_qiov; - size_t bytes = nb_sectors << BDRV_SECTOR_BITS; - - buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes); - memset(buf, 0, bytes); - qemu_iovec_init(&local_qiov, 1); - qemu_iovec_add(&local_qiov, buf, bytes); - ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov, - BDRV_REQ_ZERO_WRITE | flags); - qemu_vfree(buf); - } - return ret; + return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, + BDRV_REQ_ZERO_WRITE | flags); } int bdrv_flush_all(void) @@ -1456,9 +1521,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { ret |= BDRV_BLOCK_ALLOCATED; - } - - if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) { + } else { if (bdrv_unallocated_blocks_are_zero(bs)) { ret |= BDRV_BLOCK_ZERO; } else if (bs->backing_hd) { @@ -2340,7 +2403,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { return ret; } else if (bs->read_only) { - return -EROFS; + return -EPERM; } bdrv_reset_dirty(bs, sector_num, nb_sectors); @@ -2489,7 +2552,7 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) { int i; - size_t alignment = bdrv_opt_mem_align(bs); + size_t alignment = bdrv_min_mem_align(bs); for (i = 0; i < qiov->niov; i++) { if ((uintptr_t) qiov->iov[i].iov_base % alignment) { diff --git a/block/parallels.c b/block/parallels.c index 4f9cd8dd23..046b56844c 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -2,8 +2,12 @@ * Block driver for Parallels disk image format * * Copyright (c) 2007 Alex Beregszaszi + * Copyright (c) 2015 Denis V. Lunev <den@openvz.org> * - * This code is based on comparing different disk images created by Parallels. + * This code was originally based on comparing different disk images created + * by Parallels. Currently it is based on opened OpenVZ sources + * available at + * http://git.openvz.org/?p=ploop;a=summary * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -26,63 +30,539 @@ #include "qemu-common.h" #include "block/block_int.h" #include "qemu/module.h" +#include "qemu/bitmap.h" +#include "qapi/util.h" /**************************************************************/ #define HEADER_MAGIC "WithoutFreeSpace" #define HEADER_MAGIC2 "WithouFreSpacExt" #define HEADER_VERSION 2 -#define HEADER_SIZE 64 +#define HEADER_INUSE_MAGIC (0x746F6E59) + +#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */ + // always little-endian -struct parallels_header { +typedef struct ParallelsHeader { char magic[16]; // "WithoutFreeSpace" uint32_t version; uint32_t heads; uint32_t cylinders; uint32_t tracks; - uint32_t catalog_entries; + uint32_t bat_entries; uint64_t nb_sectors; uint32_t inuse; uint32_t data_off; char padding[12]; -} QEMU_PACKED; +} QEMU_PACKED ParallelsHeader; + + +typedef enum ParallelsPreallocMode { + PRL_PREALLOC_MODE_FALLOCATE = 0, + PRL_PREALLOC_MODE_TRUNCATE = 1, + PRL_PREALLOC_MODE_MAX = 2, +} ParallelsPreallocMode; + +static const char *prealloc_mode_lookup[] = { + "falloc", + "truncate", + NULL, +}; + typedef struct BDRVParallelsState { + /** Locking is conservative, the lock protects + * - image file extending (truncate, fallocate) + * - any access to block allocation table + */ CoMutex lock; - uint32_t *catalog_bitmap; - unsigned int catalog_size; + ParallelsHeader *header; + uint32_t header_size; + bool header_unclean; + + unsigned long *bat_dirty_bmap; + unsigned int bat_dirty_block; + + uint32_t *bat_bitmap; + unsigned int bat_size; + + int64_t data_end; + uint64_t prealloc_size; + ParallelsPreallocMode prealloc_mode; unsigned int tracks; unsigned int off_multiplier; } BDRVParallelsState; -static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename) + +#define PARALLELS_OPT_PREALLOC_MODE "prealloc-mode" +#define PARALLELS_OPT_PREALLOC_SIZE "prealloc-size" + +static QemuOptsList parallels_runtime_opts = { + .name = "parallels", + .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head), + .desc = { + { + .name = PARALLELS_OPT_PREALLOC_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Preallocation size on image expansion", + .def_value_str = "128MiB", + }, + { + .name = PARALLELS_OPT_PREALLOC_MODE, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode on image expansion " + "(allowed values: falloc, truncate)", + .def_value_str = "falloc", + }, + { /* end of list */ }, + }, +}; + + +static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx) +{ + return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier; +} + +static uint32_t bat_entry_off(uint32_t idx) +{ + return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx; +} + +static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) +{ + uint32_t index, offset; + + index = sector_num / s->tracks; + offset = sector_num % s->tracks; + + /* not allocated */ + if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) { + return -1; + } + return bat2sect(s, index) + offset; +} + +static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, + int nb_sectors) +{ + int ret = s->tracks - sector_num % s->tracks; + return MIN(nb_sectors, ret); +} + +static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, + int nb_sectors, int *pnum) +{ + int64_t start_off = -2, prev_end_off = -2; + + *pnum = 0; + while (nb_sectors > 0 || start_off == -2) { + int64_t offset = seek_to_sector(s, sector_num); + int to_end; + + if (start_off == -2) { + start_off = offset; + prev_end_off = offset; + } else if (offset != prev_end_off) { + break; + } + + to_end = cluster_remainder(s, sector_num, nb_sectors); + nb_sectors -= to_end; + sector_num += to_end; + *pnum += to_end; + + if (offset > 0) { + prev_end_off += to_end; + } + } + return start_off; +} + +static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) { - const struct parallels_header *ph = (const void *)buf; + BDRVParallelsState *s = bs->opaque; + uint32_t idx, to_allocate, i; + int64_t pos, space; + + pos = block_status(s, sector_num, nb_sectors, pnum); + if (pos > 0) { + return pos; + } + + idx = sector_num / s->tracks; + if (idx >= s->bat_size) { + return -EINVAL; + } + + to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx; + space = to_allocate * s->tracks; + if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) { + int ret; + space += s->prealloc_size; + if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { + ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0); + } else { + ret = bdrv_truncate(bs->file, + (s->data_end + space) << BDRV_SECTOR_BITS); + } + if (ret < 0) { + return ret; + } + } + + for (i = 0; i < to_allocate; i++) { + s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier); + s->data_end += s->tracks; + bitmap_set(s->bat_dirty_bmap, + bat_entry_off(idx) / s->bat_dirty_block, 1); + } + + return bat2sect(s, idx) + sector_num % s->tracks; +} + - if (buf_size < HEADER_SIZE) +static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block); + unsigned long bit; + + qemu_co_mutex_lock(&s->lock); + + bit = find_first_bit(s->bat_dirty_bmap, size); + while (bit < size) { + uint32_t off = bit * s->bat_dirty_block; + uint32_t to_write = s->bat_dirty_block; + int ret; + + if (off + to_write > s->header_size) { + to_write = s->header_size - off; + } + ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write); + if (ret < 0) { + qemu_co_mutex_unlock(&s->lock); + return ret; + } + bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1); + } + bitmap_zero(s->bat_dirty_bmap, size); + + qemu_co_mutex_unlock(&s->lock); + return 0; +} + + +static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum) +{ + BDRVParallelsState *s = bs->opaque; + int64_t offset; + + qemu_co_mutex_lock(&s->lock); + offset = block_status(s, sector_num, nb_sectors, pnum); + qemu_co_mutex_unlock(&s->lock); + + if (offset < 0) { return 0; + } + + return (offset << BDRV_SECTOR_BITS) | + BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; +} + +static coroutine_fn int parallels_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + BDRVParallelsState *s = bs->opaque; + uint64_t bytes_done = 0; + QEMUIOVector hd_qiov; + int ret = 0; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + while (nb_sectors > 0) { + int64_t position; + int n, nbytes; + + qemu_co_mutex_lock(&s->lock); + position = allocate_clusters(bs, sector_num, nb_sectors, &n); + qemu_co_mutex_unlock(&s->lock); + if (position < 0) { + ret = (int)position; + break; + } + + nbytes = n << BDRV_SECTOR_BITS; + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); + + ret = bdrv_co_writev(bs->file, position, n, &hd_qiov); + if (ret < 0) { + break; + } + + nb_sectors -= n; + sector_num += n; + bytes_done += nbytes; + } + + qemu_iovec_destroy(&hd_qiov); + return ret; +} + +static coroutine_fn int parallels_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + BDRVParallelsState *s = bs->opaque; + uint64_t bytes_done = 0; + QEMUIOVector hd_qiov; + int ret = 0; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + while (nb_sectors > 0) { + int64_t position; + int n, nbytes; + + qemu_co_mutex_lock(&s->lock); + position = block_status(s, sector_num, nb_sectors, &n); + qemu_co_mutex_unlock(&s->lock); + + nbytes = n << BDRV_SECTOR_BITS; + + if (position < 0) { + qemu_iovec_memset(qiov, bytes_done, 0, nbytes); + } else { + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); + + ret = bdrv_co_readv(bs->file, position, n, &hd_qiov); + if (ret < 0) { + break; + } + } + + nb_sectors -= n; + sector_num += n; + bytes_done += nbytes; + } + + qemu_iovec_destroy(&hd_qiov); + return ret; +} + + +static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVParallelsState *s = bs->opaque; + int64_t size, prev_off, high_off; + int ret; + uint32_t i; + bool flush_bat = false; + int cluster_size = s->tracks << BDRV_SECTOR_BITS; + + size = bdrv_getlength(bs->file); + if (size < 0) { + res->check_errors++; + return size; + } + + if (s->header_unclean) { + fprintf(stderr, "%s image was not closed correctly\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + /* parallels_close will do the job right */ + res->corruptions_fixed++; + s->header_unclean = false; + } + } + + res->bfi.total_clusters = s->bat_size; + res->bfi.compressed_clusters = 0; /* compression is not supported */ + + high_off = 0; + prev_off = 0; + for (i = 0; i < s->bat_size; i++) { + int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (off == 0) { + prev_off = 0; + continue; + } + + /* cluster outside the image */ + if (off > size) { + fprintf(stderr, "%s cluster %u is outside image\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + prev_off = 0; + s->bat_bitmap[i] = 0; + res->corruptions_fixed++; + flush_bat = true; + continue; + } + } + + res->bfi.allocated_clusters++; + if (off > high_off) { + high_off = off; + } + + if (prev_off != 0 && (prev_off + cluster_size) != off) { + res->bfi.fragmented_clusters++; + } + prev_off = off; + } + + if (flush_bat) { + ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size); + if (ret < 0) { + res->check_errors++; + return ret; + } + } + + res->image_end_offset = high_off + cluster_size; + if (size > res->image_end_offset) { + int64_t count; + count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size); + fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", + size - res->image_end_offset); + res->leaks += count; + if (fix & BDRV_FIX_LEAKS) { + ret = bdrv_truncate(bs->file, res->image_end_offset); + if (ret < 0) { + res->check_errors++; + return ret; + } + res->leaks_fixed += count; + } + } + + return 0; +} + + +static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) +{ + int64_t total_size, cl_size; + uint8_t tmp[BDRV_SECTOR_SIZE]; + Error *local_err = NULL; + BlockDriverState *file; + uint32_t bat_entries, bat_sectors; + ParallelsHeader header; + int ret; + + total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, + DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE); + + ret = bdrv_create_file(filename, opts, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + + file = NULL; + ret = bdrv_open(&file, filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + ret = bdrv_truncate(file, 0); + if (ret < 0) { + goto exit; + } + + bat_entries = DIV_ROUND_UP(total_size, cl_size); + bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size); + bat_sectors = (bat_sectors * cl_size) >> BDRV_SECTOR_BITS; + + memset(&header, 0, sizeof(header)); + memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic)); + header.version = cpu_to_le32(HEADER_VERSION); + /* don't care much about geometry, it is not used on image level */ + header.heads = cpu_to_le32(16); + header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32); + header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS); + header.bat_entries = cpu_to_le32(bat_entries); + header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE)); + header.data_off = cpu_to_le32(bat_sectors); + + /* write all the data */ + memset(tmp, 0, sizeof(tmp)); + memcpy(tmp, &header, sizeof(header)); + + ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0); + if (ret < 0) { + goto exit; + } + ret = 0; + +done: + bdrv_unref(file); + return ret; + +exit: + error_setg_errno(errp, -ret, "Failed to create Parallels image"); + goto done; +} + + +static int parallels_probe(const uint8_t *buf, int buf_size, + const char *filename) +{ + const ParallelsHeader *ph = (const void *)buf; + + if (buf_size < sizeof(ParallelsHeader)) { + return 0; + } if ((!memcmp(ph->magic, HEADER_MAGIC, 16) || - !memcmp(ph->magic, HEADER_MAGIC2, 16)) && - (le32_to_cpu(ph->version) == HEADER_VERSION)) + !memcmp(ph->magic, HEADER_MAGIC2, 16)) && + (le32_to_cpu(ph->version) == HEADER_VERSION)) { return 100; + } return 0; } +static int parallels_update_header(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader)); + + if (size > s->header_size) { + size = s->header_size; + } + return bdrv_pwrite_sync(bs->file, 0, s->header, size); +} + static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVParallelsState *s = bs->opaque; - int i; - struct parallels_header ph; - int ret; - - bs->read_only = 1; // no write support yet + ParallelsHeader ph; + int ret, size, i; + QemuOpts *opts = NULL; + Error *local_err = NULL; + char *buf; ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph)); if (ret < 0) { @@ -115,25 +595,90 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - s->catalog_size = le32_to_cpu(ph.catalog_entries); - if (s->catalog_size > INT_MAX / 4) { + s->bat_size = le32_to_cpu(ph.bat_entries); + if (s->bat_size > INT_MAX / sizeof(uint32_t)) { error_setg(errp, "Catalog too large"); ret = -EFBIG; goto fail; } - s->catalog_bitmap = g_try_new(uint32_t, s->catalog_size); - if (s->catalog_size && s->catalog_bitmap == NULL) { + + size = bat_entry_off(s->bat_size); + s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file)); + s->header = qemu_try_blockalign(bs->file, s->header_size); + if (s->header == NULL) { ret = -ENOMEM; goto fail; } + s->data_end = le32_to_cpu(ph.data_off); + if (s->data_end == 0) { + s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + } + if (s->data_end < s->header_size) { + /* there is not enough unused space to fit to block align between BAT + and actual data. We can't avoid read-modify-write... */ + s->header_size = size; + } - ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4); + ret = bdrv_pread(bs->file, 0, s->header, s->header_size); if (ret < 0) { goto fail; } + s->bat_bitmap = (uint32_t *)(s->header + 1); + + for (i = 0; i < s->bat_size; i++) { + int64_t off = bat2sect(s, i); + if (off >= s->data_end) { + s->data_end = off + s->tracks; + } + } - for (i = 0; i < s->catalog_size; i++) - le32_to_cpus(&s->catalog_bitmap[i]); + if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { + /* Image was not closed correctly. The check is mandatory */ + s->header_unclean = true; + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { + error_setg(errp, "parallels: Image was not closed correctly; " + "cannot be opened read/write"); + ret = -EACCES; + goto fail; + } + } + + opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, &local_err); + if (local_err != NULL) { + goto fail_options; + } + + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err != NULL) { + goto fail_options; + } + + s->prealloc_size = + qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); + s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); + buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); + s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf, + PRL_PREALLOC_MODE_MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err); + g_free(buf); + if (local_err != NULL) { + goto fail_options; + } + if (!bdrv_has_zero_init(bs->file) || + bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) { + s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; + } + + if (flags & BDRV_O_RDWR) { + s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC); + ret = parallels_update_header(bs); + if (ret < 0) { + goto fail; + } + } + + s->bat_dirty_block = 4 * getpagesize(); + s->bat_dirty_bmap = + bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block)); qemu_co_mutex_init(&s->lock); return 0; @@ -142,67 +687,67 @@ fail_format: error_setg(errp, "Image not in Parallels format"); ret = -EINVAL; fail: - g_free(s->catalog_bitmap); + qemu_vfree(s->header); return ret; + +fail_options: + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; } -static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) + +static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; - uint32_t index, offset; - - index = sector_num / s->tracks; - offset = sector_num % s->tracks; - /* not allocated */ - if ((index >= s->catalog_size) || (s->catalog_bitmap[index] == 0)) - return -1; - return - ((uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset) * 512; -} + if (bs->open_flags & BDRV_O_RDWR) { + s->header->inuse = 0; + parallels_update_header(bs); + } -static int parallels_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - while (nb_sectors > 0) { - int64_t position = seek_to_sector(bs, sector_num); - if (position >= 0) { - if (bdrv_pread(bs->file, position, buf, 512) != 512) - return -1; - } else { - memset(buf, 0, 512); - } - nb_sectors--; - sector_num++; - buf += 512; + if (bs->open_flags & BDRV_O_RDWR) { + bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS); } - return 0; -} -static coroutine_fn int parallels_co_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - int ret; - BDRVParallelsState *s = bs->opaque; - qemu_co_mutex_lock(&s->lock); - ret = parallels_read(bs, sector_num, buf, nb_sectors); - qemu_co_mutex_unlock(&s->lock); - return ret; + g_free(s->bat_dirty_bmap); + qemu_vfree(s->header); } -static void parallels_close(BlockDriverState *bs) -{ - BDRVParallelsState *s = bs->opaque; - g_free(s->catalog_bitmap); -} +static QemuOptsList parallels_create_opts = { + .name = "parallels-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size", + }, + { + .name = BLOCK_OPT_CLUSTER_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Parallels image cluster size", + .def_value_str = stringify(DEFAULT_CLUSTER_SIZE), + }, + { /* end of list */ } + } +}; static BlockDriver bdrv_parallels = { .format_name = "parallels", .instance_size = sizeof(BDRVParallelsState), .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, - .bdrv_read = parallels_co_read, .bdrv_close = parallels_close, + .bdrv_co_get_block_status = parallels_co_get_block_status, + .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_co_flush_to_os = parallels_co_flush_to_os, + .bdrv_co_readv = parallels_co_readv, + .bdrv_co_writev = parallels_co_writev, + + .bdrv_create = parallels_create, + .bdrv_check = parallels_check, + .create_opts = ¶llels_create_opts, }; static void bdrv_parallels_init(void) diff --git a/block/raw-posix.c b/block/raw-posix.c index 24d85826c4..2990e954ae 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -301,6 +301,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) { BDRVRawState *s = bs->opaque; char *buf; + size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); /* For /dev/sg devices the alignment is not really used. With buffered I/O, we don't have any restrictions. */ @@ -330,9 +331,9 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) /* If we could not get the sizes so far, we can only guess them */ if (!s->buf_align) { size_t align; - buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE); - for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { - if (raw_is_io_aligned(fd, buf + align, MAX_BLOCKSIZE)) { + buf = qemu_memalign(max_align, 2 * max_align); + for (align = 512; align <= max_align; align <<= 1) { + if (raw_is_io_aligned(fd, buf + align, max_align)) { s->buf_align = align; break; } @@ -342,8 +343,8 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) if (!bs->request_alignment) { size_t align; - buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE); - for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { + buf = qemu_memalign(s->buf_align, max_align); + for (align = 512; align <= max_align; align <<= 1) { if (raw_is_io_aligned(fd, buf, align)) { bs->request_alignment = align; break; @@ -725,7 +726,8 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) BDRVRawState *s = bs->opaque; raw_probe_alignment(bs, s->fd, errp); - bs->bl.opt_mem_alignment = s->buf_align; + bs->bl.min_mem_alignment = s->buf_align; + bs->bl.opt_mem_alignment = MAX(s->buf_align, getpagesize()); } static int check_for_dasd(int fd) |