aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorPeter Lieven <pl@kamp.de>2014-04-28 13:11:32 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2014-04-29 11:14:25 +0200
commitb03c38057b7ac4ffb60fa98a26dd4c8d5fa9c54c (patch)
tree797d5148c4e75e1dab5063a5ed1ea0c05a47480b /block
parentdbe5c58f2a44f49572b3346400779fac818fcaea (diff)
block/iscsi: speed up read for unallocated sectors
this patch implements a cache that tracks if a page on the iscsi target is allocated or not. The cache is implemented in a way that it allows for false positives (e.g. pretending a page is allocated, but it isn't), but no false negatives. The cached allocation info is then used to speed up the read process for unallocated sectors by issueing a GET_LBA_STATUS request for all sectors that are not yet known to be allocated. If the read request is confirmed to fall into an unallocated range we directly return zeroes and do not transfer the data over the wire. Tests have shown that a relatively small amount of GET_LBA_STATUS requests happens a vServer boot time to fill the allocation cache (all those blocks are not queried again). Not to transfer all the data of unallocated sectors saves a lot of time, bandwidth and storage I/O load during block jobs or storage migration and it saves a lot of bandwidth as well for any big sequential read of the whole disk (e.g. block copy or speed tests) if a significant number of blocks is unallocated. Signed-off-by: Peter Lieven <pl@kamp.de> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'block')
-rw-r--r--block/iscsi.c300
1 files changed, 198 insertions, 102 deletions
diff --git a/block/iscsi.c b/block/iscsi.c
index 56f54193c8..6b8a0a34f7 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -30,6 +30,8 @@
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
#include "block/block_int.h"
#include "trace.h"
#include "block/scsi.h"
@@ -59,6 +61,8 @@ typedef struct IscsiLun {
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
unsigned char *zeroblock;
+ unsigned long *allocationmap;
+ int cluster_sectors;
} IscsiLun;
typedef struct IscsiTask {
@@ -91,6 +95,7 @@ typedef struct IscsiAIOCB {
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
#define ISCSI_CMD_RETRIES 5
+#define ISCSI_CHECKALLOC_THRES 63
static void
iscsi_bh_cb(void *p)
@@ -273,6 +278,32 @@ static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
return 1;
}
+static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ if (iscsilun->allocationmap == NULL) {
+ return;
+ }
+ bitmap_set(iscsilun->allocationmap,
+ sector_num / iscsilun->cluster_sectors,
+ DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
+}
+
+static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ int64_t cluster_num, nb_clusters;
+ if (iscsilun->allocationmap == NULL) {
+ return;
+ }
+ cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
+ nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
+ - cluster_num;
+ if (nb_clusters > 0) {
+ bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
+ }
+}
+
static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
@@ -336,9 +367,127 @@ retry:
return -EIO;
}
+ iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+
return 0;
}
+
+static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
+ int64_t sector_num, int nb_sectors)
+{
+ unsigned long size;
+ if (iscsilun->allocationmap == NULL) {
+ return true;
+ }
+ size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+ return !(find_next_bit(iscsilun->allocationmap, size,
+ sector_num / iscsilun->cluster_sectors) == size);
+}
+
+
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+
+static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ struct scsi_get_lba_status *lbas = NULL;
+ struct scsi_lba_status_descriptor *lbasd = NULL;
+ struct IscsiTask iTask;
+ int64_t ret;
+
+ iscsi_co_init_iscsitask(iscsilun, &iTask);
+
+ if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* default to all sectors allocated */
+ ret = BDRV_BLOCK_DATA;
+ ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
+ *pnum = nb_sectors;
+
+ /* LUN does not support logical block provisioning */
+ if (iscsilun->lbpme == 0) {
+ goto out;
+ }
+
+retry:
+ if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
+ sector_qemu2lun(sector_num, iscsilun),
+ 8 + 16, iscsi_co_generic_cb,
+ &iTask) == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ while (!iTask.complete) {
+ iscsi_set_events(iscsilun);
+ qemu_coroutine_yield();
+ }
+
+ if (iTask.do_retry) {
+ if (iTask.task != NULL) {
+ scsi_free_scsi_task(iTask.task);
+ iTask.task = NULL;
+ }
+ iTask.complete = 0;
+ goto retry;
+ }
+
+ if (iTask.status != SCSI_STATUS_GOOD) {
+ /* in case the get_lba_status_callout fails (i.e.
+ * because the device is busy or the cmd is not
+ * supported) we pretend all blocks are allocated
+ * for backwards compatibility */
+ goto out;
+ }
+
+ lbas = scsi_datain_unmarshall(iTask.task);
+ if (lbas == NULL) {
+ ret = -EIO;
+ goto out;
+ }
+
+ lbasd = &lbas->descriptors[0];
+
+ if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
+ ret = -EIO;
+ goto out;
+ }
+
+ *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
+
+ if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
+ lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
+ ret &= ~BDRV_BLOCK_DATA;
+ if (iscsilun->lbprz) {
+ ret |= BDRV_BLOCK_ZERO;
+ }
+ }
+
+ if (ret & BDRV_BLOCK_ZERO) {
+ iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
+ } else {
+ iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
+ }
+
+ if (*pnum > nb_sectors) {
+ *pnum = nb_sectors;
+ }
+out:
+ if (iTask.task != NULL) {
+ scsi_free_scsi_task(iTask.task);
+ }
+ return ret;
+}
+
+#endif /* LIBISCSI_FEATURE_IOVECTOR */
+
+
static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
@@ -355,6 +504,22 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
return -EINVAL;
}
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+ if (iscsilun->lbprz && nb_sectors > ISCSI_CHECKALLOC_THRES &&
+ !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
+ int64_t ret;
+ int pnum;
+ ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
+ qemu_iovec_memset(iov, 0, 0x00, iov->size);
+ return 0;
+ }
+ }
+#endif
+
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
@@ -643,101 +808,6 @@ iscsi_getlength(BlockDriverState *bs)
return len;
}
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
-
-static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- IscsiLun *iscsilun = bs->opaque;
- struct scsi_get_lba_status *lbas = NULL;
- struct scsi_lba_status_descriptor *lbasd = NULL;
- struct IscsiTask iTask;
- int64_t ret;
-
- iscsi_co_init_iscsitask(iscsilun, &iTask);
-
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
- ret = -EINVAL;
- goto out;
- }
-
- /* default to all sectors allocated */
- ret = BDRV_BLOCK_DATA;
- ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
- *pnum = nb_sectors;
-
- /* LUN does not support logical block provisioning */
- if (iscsilun->lbpme == 0) {
- goto out;
- }
-
-retry:
- if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
- sector_qemu2lun(sector_num, iscsilun),
- 8 + 16, iscsi_co_generic_cb,
- &iTask) == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_coroutine_yield();
- }
-
- if (iTask.do_retry) {
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- iTask.task = NULL;
- }
- iTask.complete = 0;
- goto retry;
- }
-
- if (iTask.status != SCSI_STATUS_GOOD) {
- /* in case the get_lba_status_callout fails (i.e.
- * because the device is busy or the cmd is not
- * supported) we pretend all blocks are allocated
- * for backwards compatibility */
- goto out;
- }
-
- lbas = scsi_datain_unmarshall(iTask.task);
- if (lbas == NULL) {
- ret = -EIO;
- goto out;
- }
-
- lbasd = &lbas->descriptors[0];
-
- if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
- ret = -EIO;
- goto out;
- }
-
- *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
- if (*pnum > nb_sectors) {
- *pnum = nb_sectors;
- }
-
- if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
- lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
- ret &= ~BDRV_BLOCK_DATA;
- if (iscsilun->lbprz) {
- ret |= BDRV_BLOCK_ZERO;
- }
- }
-
-out:
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- }
- return ret;
-}
-
-#endif /* LIBISCSI_FEATURE_IOVECTOR */
-
static int
coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
@@ -791,6 +861,8 @@ retry:
return -EIO;
}
+ iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+
return 0;
}
@@ -865,6 +937,12 @@ retry:
return -EIO;
}
+ if (flags & BDRV_REQ_MAY_UNMAP) {
+ iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+ } else {
+ iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+ }
+
return 0;
}
@@ -1297,6 +1375,22 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
#endif
+ /* Guess the internal cluster (page) size of the iscsi target by the means
+ * of opt_unmap_gran. Transfer the unmap granularity only if it has a
+ * reasonable size */
+ if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 64 * 1024 &&
+ iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
+ iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
+ iscsilun->block_size) >> BDRV_SECTOR_BITS;
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+ if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
+ iscsilun->allocationmap =
+ bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+ iscsilun->cluster_sectors));
+ }
+#endif
+ }
+
out:
qemu_opts_del(opts);
if (initiator_name != NULL) {
@@ -1330,6 +1424,7 @@ static void iscsi_close(BlockDriverState *bs)
qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
+ g_free(iscsilun->allocationmap);
memset(iscsilun, 0, sizeof(IscsiLun));
}
@@ -1390,6 +1485,13 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
return -EINVAL;
}
+ if (iscsilun->allocationmap != NULL) {
+ g_free(iscsilun->allocationmap);
+ iscsilun->allocationmap =
+ bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+ iscsilun->cluster_sectors));
+ }
+
return 0;
}
@@ -1452,13 +1554,7 @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
IscsiLun *iscsilun = bs->opaque;
bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
- /* Guess the internal cluster (page) size of the iscsi target by the means
- * of opt_unmap_gran. Transfer the unmap granularity only if it has a
- * reasonable size for bdi->cluster_size */
- if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 64 * 1024 &&
- iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
- bdi->cluster_size = iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
- }
+ bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
return 0;
}