1 https://bugzilla.redhat.com/show_bug.cgi?id=433560
3 Revised block device address range patch
5 The original patch adds checks to the main bdrv_XXX apis to validate that
6 the I/O operation does not exceed the bounds of the disk - ie beyond the
7 total_sectors count. This works correctly for bdrv_XXX calls from the IDE
8 driver. With disk formats like QCow though, bdrv_XXX is re-entrant,
9 because the QCow driver uses the block APIs for dealing with its underlying
10 file. The problem is that QCow files are grow-on-demand, so writes will
11 *explicitly* be beyond the end of the file. The original patch blocks any
12 I/O operation which would cause the QCow file to grow, resulting it more
13 or less catasatrophic data loss.
15 Basically the bounds checking needs to distinguish between checking for
16 the logical disk extents, vs the physical disk extents. For raw files
17 these are the same so initial tests showed no problems, but for QCow
18 format disks they are different & thus we see a problem
20 What follows is a revised patch which introduces a flag BDRV_O_AUTOGROW
21 which can be passed to bdrv_open to indicate that the files can be allowed
22 to automatically extend their extents. This flag should only be used by
23 internal block drivers such as block-qcow2.c, block-vmdk.c In my testing
24 this has fixed the qcow corruption, and still maintains the goal of Ian's
25 original patch which was to prevent the guest VM writing beyond the logical
28 diff -rup kvm-60.orig/qemu/block.c kvm-60.new/qemu/block.c
29 --- kvm-60.orig/qemu/block.c 2008-02-26 18:44:28.000000000 -0500
30 +++ kvm-60.new/qemu/block.c 2008-02-26 18:44:52.000000000 -0500
31 @@ -124,6 +124,60 @@ void path_combine(char *dest, int dest_s
35 +static int bdrv_rd_badreq_sectors(BlockDriverState *bs,
36 + int64_t sector_num, int nb_sectors)
41 + nb_sectors > bs->total_sectors ||
42 + sector_num > bs->total_sectors - nb_sectors;
45 +static int bdrv_rd_badreq_bytes(BlockDriverState *bs,
46 + int64_t offset, int count)
48 + int64_t size = bs->total_sectors << SECTOR_BITS;
53 + offset > size - count;
56 +static int bdrv_wr_badreq_sectors(BlockDriverState *bs,
57 + int64_t sector_num, int nb_sectors)
59 + if (sector_num < 0 ||
63 + if (sector_num > bs->total_sectors - nb_sectors) {
65 + bs->total_sectors = sector_num + nb_sectors;
72 +static int bdrv_wr_badreq_bytes(BlockDriverState *bs,
73 + int64_t offset, int count)
75 + int64_t size = bs->total_sectors << SECTOR_BITS;
80 + if (offset > size - count) {
82 + bs->total_sectors = (offset + count + SECTOR_SIZE - 1) >> SECTOR_BITS;
90 static void bdrv_register(BlockDriver *bdrv)
92 @@ -332,6 +386,10 @@ int bdrv_open2(BlockDriverState *bs, con
98 + if (flags & BDRV_O_AUTOGROW)
101 if (flags & BDRV_O_SNAPSHOT) {
102 BlockDriverState *bs1;
103 @@ -376,6 +434,7 @@ int bdrv_open2(BlockDriverState *bs, con
106 bs->opaque = qemu_mallocz(drv->instance_size);
107 + bs->total_sectors = 0; /* driver will set if it does not do getlength */
108 if (bs->opaque == NULL && drv->instance_size > 0)
110 /* Note: for compatibility, we open disk image files as RDWR, and
111 @@ -441,6 +500,7 @@ void bdrv_close(BlockDriverState *bs)
114 /* call the change callback */
115 + bs->total_sectors = 0;
116 bs->media_changed = 1;
118 bs->change_cb(bs->change_opaque);
119 @@ -506,6 +566,8 @@ int bdrv_read(BlockDriverState *bs, int6
123 + if (bdrv_rd_badreq_sectors(bs, sector_num, nb_sectors))
125 if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
126 memcpy(buf, bs->boot_sector_data, 512);
128 @@ -546,6 +608,8 @@ int bdrv_write(BlockDriverState *bs, int
132 + if (bdrv_wr_badreq_sectors(bs, sector_num, nb_sectors))
134 if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
135 memcpy(bs->boot_sector_data, buf, 512);
137 @@ -671,6 +735,8 @@ int bdrv_pread(BlockDriverState *bs, int
139 if (!drv->bdrv_pread)
140 return bdrv_pread_em(bs, offset, buf1, count1);
141 + if (bdrv_rd_badreq_bytes(bs, offset, count1))
143 return drv->bdrv_pread(bs, offset, buf1, count1);
146 @@ -686,6 +752,8 @@ int bdrv_pwrite(BlockDriverState *bs, in
148 if (!drv->bdrv_pwrite)
149 return bdrv_pwrite_em(bs, offset, buf1, count1);
150 + if (bdrv_wr_badreq_bytes(bs, offset, count1))
152 return drv->bdrv_pwrite(bs, offset, buf1, count1);
155 @@ -1091,6 +1159,8 @@ int bdrv_write_compressed(BlockDriverSta
157 if (!drv->bdrv_write_compressed)
159 + if (bdrv_wr_badreq_sectors(bs, sector_num, nb_sectors))
161 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
164 @@ -1237,6 +1307,8 @@ BlockDriverAIOCB *bdrv_aio_read(BlockDri
168 + if (bdrv_rd_badreq_sectors(bs, sector_num, nb_sectors))
171 /* XXX: we assume that nb_sectors == 0 is suppored by the async read */
172 if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
173 @@ -1268,6 +1340,8 @@ BlockDriverAIOCB *bdrv_aio_write(BlockDr
177 + if (bdrv_wr_badreq_sectors(bs, sector_num, nb_sectors))
179 if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
180 memcpy(bs->boot_sector_data, buf, 512);
182 diff -rup kvm-60.orig/qemu/block.h kvm-60.new/qemu/block.h
183 --- kvm-60.orig/qemu/block.h 2008-01-20 07:35:04.000000000 -0500
184 +++ kvm-60.new/qemu/block.h 2008-02-26 18:44:52.000000000 -0500
185 @@ -45,6 +45,7 @@ typedef struct QEMUSnapshotInfo {
188 #define BDRV_O_DIRECT 0x0020
189 +#define BDRV_O_AUTOGROW 0x0040 /* Allow backing file to extend when writing past end of file */
192 void bdrv_info(void);
193 diff -rup kvm-60.orig/qemu/block_int.h kvm-60.new/qemu/block_int.h
194 --- kvm-60.orig/qemu/block_int.h 2008-01-20 07:35:04.000000000 -0500
195 +++ kvm-60.new/qemu/block_int.h 2008-02-26 18:44:52.000000000 -0500
196 @@ -97,6 +97,7 @@ struct BlockDriverState {
197 int locked; /* if true, the media cannot temporarily be ejected */
198 int encrypted; /* if true, the media is encrypted */
199 int sg; /* if true, the device is a /dev/sg* */
200 + int autogrow; /* if true, the backing store can auto-extend to allocate new extents */
201 /* event callback when inserting/removing */
202 void (*change_cb)(void *opaque);
204 diff -rup kvm-60.orig/qemu/block-qcow2.c kvm-60.new/qemu/block-qcow2.c
205 --- kvm-60.orig/qemu/block-qcow2.c 2008-01-20 07:35:04.000000000 -0500
206 +++ kvm-60.new/qemu/block-qcow2.c 2008-02-26 18:44:52.000000000 -0500
207 @@ -191,7 +191,7 @@ static int qcow_open(BlockDriverState *b
208 int len, i, shift, ret;
211 - ret = bdrv_file_open(&s->hd, filename, flags);
212 + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_AUTOGROW);
215 if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
216 diff -rup kvm-60.orig/qemu/block-qcow.c kvm-60.new/qemu/block-qcow.c
217 --- kvm-60.orig/qemu/block-qcow.c 2008-01-20 07:35:04.000000000 -0500
218 +++ kvm-60.new/qemu/block-qcow.c 2008-02-26 18:44:52.000000000 -0500
219 @@ -95,7 +95,7 @@ static int qcow_open(BlockDriverState *b
220 int len, i, shift, ret;
223 - ret = bdrv_file_open(&s->hd, filename, flags);
224 + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_AUTOGROW);
227 if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
228 diff -rup kvm-60.orig/qemu/block-vmdk.c kvm-60.new/qemu/block-vmdk.c
229 --- kvm-60.orig/qemu/block-vmdk.c 2008-01-20 07:35:04.000000000 -0500
230 +++ kvm-60.new/qemu/block-vmdk.c 2008-02-26 18:44:52.000000000 -0500
231 @@ -375,7 +375,7 @@ static int vmdk_open(BlockDriverState *b
232 flags = BDRV_O_RDONLY;
233 fprintf(stderr, "(VMDK) image open: flags=0x%x filename=%s\n", flags, bs->filename);
235 - ret = bdrv_file_open(&s->hd, filename, flags);
236 + ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_AUTOGROW);
239 if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))