https://bugzilla.redhat.com/show_bug.cgi?id=433560 Revised block device address range patch The original patch adds checks to the main bdrv_XXX apis to validate that the I/O operation does not exceed the bounds of the disk - ie beyond the total_sectors count. This works correctly for bdrv_XXX calls from the IDE driver. With disk formats like QCow though, bdrv_XXX is re-entrant, because the QCow driver uses the block APIs for dealing with its underlying file. The problem is that QCow files are grow-on-demand, so writes will *explicitly* be beyond the end of the file. The original patch blocks any I/O operation which would cause the QCow file to grow, resulting it more or less catasatrophic data loss. Basically the bounds checking needs to distinguish between checking for the logical disk extents, vs the physical disk extents. For raw files these are the same so initial tests showed no problems, but for QCow format disks they are different & thus we see a problem What follows is a revised patch which introduces a flag BDRV_O_AUTOGROW which can be passed to bdrv_open to indicate that the files can be allowed to automatically extend their extents. This flag should only be used by internal block drivers such as block-qcow2.c, block-vmdk.c In my testing this has fixed the qcow corruption, and still maintains the goal of Ian's original patch which was to prevent the guest VM writing beyond the logical disk extents. diff -rup kvm-60.orig/qemu/block.c kvm-60.new/qemu/block.c --- kvm-60.orig/qemu/block.c 2008-02-26 18:44:28.000000000 -0500 +++ kvm-60.new/qemu/block.c 2008-02-26 18:44:52.000000000 -0500 @@ -124,6 +124,60 @@ void path_combine(char *dest, int dest_s } } +static int bdrv_rd_badreq_sectors(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + return + nb_sectors < 0 || + sector_num < 0 || + nb_sectors > bs->total_sectors || + sector_num > bs->total_sectors - nb_sectors; +} + +static int bdrv_rd_badreq_bytes(BlockDriverState *bs, + int64_t offset, int count) +{ + int64_t size = bs->total_sectors << SECTOR_BITS; + return + count < 0 || + size < 0 || + count > size || + offset > size - count; +} + +static int bdrv_wr_badreq_sectors(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + if (sector_num < 0 || + nb_sectors < 0) + return 1; + + if (sector_num > bs->total_sectors - nb_sectors) { + if (bs->autogrow) + bs->total_sectors = sector_num + nb_sectors; + else + return 1; + } + return 0; +} + +static int bdrv_wr_badreq_bytes(BlockDriverState *bs, + int64_t offset, int count) +{ + int64_t size = bs->total_sectors << SECTOR_BITS; + if (count < 0 || + offset < 0) + return 1; + + if (offset > size - count) { + if (bs->autogrow) + bs->total_sectors = (offset + count + SECTOR_SIZE - 1) >> SECTOR_BITS; + else + return 1; + } + return 0; +} + static void bdrv_register(BlockDriver *bdrv) { @@ -332,6 +386,10 @@ int bdrv_open2(BlockDriverState *bs, con bs->read_only = 0; bs->is_temporary = 0; bs->encrypted = 0; + bs->autogrow = 0; + + if (flags & BDRV_O_AUTOGROW) + bs->autogrow = 1; if (flags & BDRV_O_SNAPSHOT) { BlockDriverState *bs1; @@ -376,6 +434,7 @@ int bdrv_open2(BlockDriverState *bs, con } bs->drv = drv; bs->opaque = qemu_mallocz(drv->instance_size); + bs->total_sectors = 0; /* driver will set if it does not do getlength */ if (bs->opaque == NULL && drv->instance_size > 0) return -1; /* Note: for compatibility, we open disk image files as RDWR, and @@ -441,6 +500,7 @@ void bdrv_close(BlockDriverState *bs) bs->drv = NULL; /* call the change callback */ + bs->total_sectors = 0; bs->media_changed = 1; if (bs->change_cb) bs->change_cb(bs->change_opaque); @@ -506,6 +566,8 @@ int bdrv_read(BlockDriverState *bs, int6 if (!drv) return -ENOMEDIUM; + if (bdrv_rd_badreq_sectors(bs, sector_num, nb_sectors)) + return -EDOM; if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { memcpy(buf, bs->boot_sector_data, 512); sector_num++; @@ -546,6 +608,8 @@ int bdrv_write(BlockDriverState *bs, int return -ENOMEDIUM; if (bs->read_only) return -EACCES; + if (bdrv_wr_badreq_sectors(bs, sector_num, nb_sectors)) + return -EDOM; if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { memcpy(bs->boot_sector_data, buf, 512); } @@ -671,6 +735,8 @@ int bdrv_pread(BlockDriverState *bs, int return -ENOMEDIUM; if (!drv->bdrv_pread) return bdrv_pread_em(bs, offset, buf1, count1); + if (bdrv_rd_badreq_bytes(bs, offset, count1)) + return -EDOM; return drv->bdrv_pread(bs, offset, buf1, count1); } @@ -686,6 +752,8 @@ int bdrv_pwrite(BlockDriverState *bs, in return -ENOMEDIUM; if (!drv->bdrv_pwrite) return bdrv_pwrite_em(bs, offset, buf1, count1); + if (bdrv_wr_badreq_bytes(bs, offset, count1)) + return -EDOM; return drv->bdrv_pwrite(bs, offset, buf1, count1); } @@ -1091,6 +1159,8 @@ int bdrv_write_compressed(BlockDriverSta return -ENOMEDIUM; if (!drv->bdrv_write_compressed) return -ENOTSUP; + if (bdrv_wr_badreq_sectors(bs, sector_num, nb_sectors)) + return -EDOM; return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } @@ -1237,6 +1307,8 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDri if (!drv) return NULL; + if (bdrv_rd_badreq_sectors(bs, sector_num, nb_sectors)) + return NULL; /* XXX: we assume that nb_sectors == 0 is suppored by the async read */ if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { @@ -1268,6 +1340,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDr return NULL; if (bs->read_only) return NULL; + if (bdrv_wr_badreq_sectors(bs, sector_num, nb_sectors)) + return NULL; if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) { memcpy(bs->boot_sector_data, buf, 512); } diff -rup kvm-60.orig/qemu/block.h kvm-60.new/qemu/block.h --- kvm-60.orig/qemu/block.h 2008-01-20 07:35:04.000000000 -0500 +++ kvm-60.new/qemu/block.h 2008-02-26 18:44:52.000000000 -0500 @@ -45,6 +45,7 @@ typedef struct QEMUSnapshotInfo { it (default for bdrv_file_open()) */ #define BDRV_O_DIRECT 0x0020 +#define BDRV_O_AUTOGROW 0x0040 /* Allow backing file to extend when writing past end of file */ #ifndef QEMU_IMG void bdrv_info(void); diff -rup kvm-60.orig/qemu/block_int.h kvm-60.new/qemu/block_int.h --- kvm-60.orig/qemu/block_int.h 2008-01-20 07:35:04.000000000 -0500 +++ kvm-60.new/qemu/block_int.h 2008-02-26 18:44:52.000000000 -0500 @@ -97,6 +97,7 @@ struct BlockDriverState { int locked; /* if true, the media cannot temporarily be ejected */ int encrypted; /* if true, the media is encrypted */ int sg; /* if true, the device is a /dev/sg* */ + int autogrow; /* if true, the backing store can auto-extend to allocate new extents */ /* event callback when inserting/removing */ void (*change_cb)(void *opaque); void *change_opaque; diff -rup kvm-60.orig/qemu/block-qcow2.c kvm-60.new/qemu/block-qcow2.c --- kvm-60.orig/qemu/block-qcow2.c 2008-01-20 07:35:04.000000000 -0500 +++ kvm-60.new/qemu/block-qcow2.c 2008-02-26 18:44:52.000000000 -0500 @@ -191,7 +191,7 @@ static int qcow_open(BlockDriverState *b int len, i, shift, ret; QCowHeader header; - ret = bdrv_file_open(&s->hd, filename, flags); + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_AUTOGROW); if (ret < 0) return ret; if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) diff -rup kvm-60.orig/qemu/block-qcow.c kvm-60.new/qemu/block-qcow.c --- kvm-60.orig/qemu/block-qcow.c 2008-01-20 07:35:04.000000000 -0500 +++ kvm-60.new/qemu/block-qcow.c 2008-02-26 18:44:52.000000000 -0500 @@ -95,7 +95,7 @@ static int qcow_open(BlockDriverState *b int len, i, shift, ret; QCowHeader header; - ret = bdrv_file_open(&s->hd, filename, flags); + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_AUTOGROW); if (ret < 0) return ret; if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header)) diff -rup kvm-60.orig/qemu/block-vmdk.c kvm-60.new/qemu/block-vmdk.c --- kvm-60.orig/qemu/block-vmdk.c 2008-01-20 07:35:04.000000000 -0500 +++ kvm-60.new/qemu/block-vmdk.c 2008-02-26 18:44:52.000000000 -0500 @@ -375,7 +375,7 @@ static int vmdk_open(BlockDriverState *b flags = BDRV_O_RDONLY; fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename); - ret = bdrv_file_open(&s->hd, filename, flags); + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_AUTOGROW); if (ret < 0) return ret; if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))