]>
Commit | Line | Data |
---|---|---|
e15b74d6 | 1 | diff -Nru linux/drivers/char/Makefile linux.new/drivers/char/Makefile |
2 | --- linux/drivers/char/Makefile Thu Jan 4 06:58:46 2001 | |
3 | +++ linux.new/drivers/char/Makefile Thu Jan 4 06:50:46 2001 | |
4 | @@ -20,7 +20,7 @@ | |
5 | ||
6 | O_TARGET := char.o | |
7 | M_OBJS := | |
8 | -O_OBJS := tty_io.o n_tty.o tty_ioctl.o mem.o random.o | |
9 | +O_OBJS := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o | |
10 | OX_OBJS := pty.o misc.o | |
11 | obj-y := | |
12 | obj-m := | |
13 | diff -Nru linux/drivers/char/mem.c linux.new/drivers/char/mem.c | |
14 | --- linux/drivers/char/mem.c Thu Jan 4 06:58:46 2001 | |
15 | +++ linux.new/drivers/char/mem.c Thu Jan 4 06:59:49 2001 | |
16 | @@ -17,6 +17,7 @@ | |
17 | #include <linux/joystick.h> | |
18 | #include <linux/i2c.h> | |
19 | #include <linux/capability.h> | |
20 | +#include <linux/raw.h> | |
21 | ||
22 | #include <asm/uaccess.h> | |
23 | #include <asm/io.h> | |
24 | @@ -608,6 +609,7 @@ | |
25 | if (register_chrdev(MEM_MAJOR,"mem",&memory_fops)) | |
26 | printk("unable to get major %d for memory devs\n", MEM_MAJOR); | |
27 | rand_initialize(); | |
28 | + raw_init(); | |
29 | #if defined (CONFIG_FB) | |
30 | fbmem_init(); | |
31 | #endif | |
32 | diff -Nru linux/drivers/char/raw.c linux.new/drivers/char/raw.c | |
33 | --- linux/drivers/char/raw.c Thu Jan 1 01:00:00 1970 | |
34 | +++ linux.new/drivers/char/raw.c Thu Jan 4 06:50:46 2001 | |
35 | @@ -0,0 +1,387 @@ | |
36 | +/* | |
37 | + * linux/drivers/char/raw.c | |
38 | + * | |
39 | + * Front-end raw character devices. These can be bound to any block | |
40 | + * devices to provide genuine Unix raw character device semantics. | |
41 | + * | |
42 | + * We reserve minor number 0 for a control interface. ioctl()s on this | |
43 | + * device are used to bind the other minor numbers to block devices. | |
44 | + */ | |
45 | + | |
46 | +#include <linux/fs.h> | |
47 | +#include <linux/iobuf.h> | |
48 | +#include <linux/major.h> | |
49 | +#include <linux/blkdev.h> | |
50 | +#include <linux/raw.h> | |
51 | +#include <asm/uaccess.h> | |
52 | + | |
53 | +#define dprintk(x...) | |
54 | + | |
55 | +static kdev_t raw_device_bindings[256] = {}; | |
56 | +static int raw_device_inuse[256] = {}; | |
57 | +static int raw_device_sector_size[256] = {}; | |
58 | +static int raw_device_sector_bits[256] = {}; | |
59 | + | |
60 | +extern struct file_operations * get_blkfops(unsigned int major); | |
61 | + | |
62 | +static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *); | |
63 | + | |
64 | +ssize_t raw_read(struct file *, char *, size_t, loff_t *); | |
65 | +ssize_t raw_write(struct file *, const char *, size_t, loff_t *); | |
66 | +int raw_open(struct inode *, struct file *); | |
67 | +int raw_release(struct inode *, struct file *); | |
68 | +int raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long); | |
69 | + | |
70 | + | |
71 | +static struct file_operations raw_fops = { | |
72 | + NULL, /* llseek */ | |
73 | + raw_read, /* read */ | |
74 | + raw_write, /* write */ | |
75 | + NULL, /* readdir */ | |
76 | + NULL, /* poll */ | |
77 | + NULL, /* ioctl */ | |
78 | + NULL, /* mmap */ | |
79 | + raw_open, /* open */ | |
80 | + NULL, /* flush */ | |
81 | + raw_release, /* release */ | |
82 | + NULL /* fsync */ | |
83 | +}; | |
84 | + | |
85 | +static struct file_operations raw_ctl_fops = { | |
86 | + NULL, /* llseek */ | |
87 | + NULL, /* read */ | |
88 | + NULL, /* write */ | |
89 | + NULL, /* readdir */ | |
90 | + NULL, /* poll */ | |
91 | + raw_ctl_ioctl, /* ioctl */ | |
92 | + NULL, /* mmap */ | |
93 | + raw_open, /* open */ | |
94 | + NULL, /* flush */ | |
95 | + NULL, /* no special release code */ | |
96 | + NULL /* fsync */ | |
97 | +}; | |
98 | + | |
99 | + | |
100 | + | |
101 | +void __init raw_init(void) | |
102 | +{ | |
103 | + register_chrdev(RAW_MAJOR, "raw", &raw_fops); | |
104 | +} | |
105 | + | |
106 | + | |
107 | +/* | |
108 | + * The raw IO open and release code needs to fake appropriate | |
109 | + * open/release calls to the underlying block devices. | |
110 | + */ | |
111 | + | |
112 | +static int bdev_open(kdev_t dev, int mode) | |
113 | +{ | |
114 | + int err = 0; | |
115 | + struct file dummy_file = {}; | |
116 | + struct dentry dummy_dentry = {}; | |
117 | + struct inode * inode = get_empty_inode(); | |
118 | + | |
119 | + if (!inode) | |
120 | + return -ENOMEM; | |
121 | + | |
122 | + dummy_file.f_op = get_blkfops(MAJOR(dev)); | |
123 | + if (!dummy_file.f_op) { | |
124 | + err = -ENODEV; | |
125 | + goto done; | |
126 | + } | |
127 | + | |
128 | + if (dummy_file.f_op->open) { | |
129 | + inode->i_rdev = dev; | |
130 | + dummy_dentry.d_inode = inode; | |
131 | + dummy_file.f_dentry = &dummy_dentry; | |
132 | + dummy_file.f_mode = mode; | |
133 | + err = dummy_file.f_op->open(inode, &dummy_file); | |
134 | + } | |
135 | + | |
136 | + done: | |
137 | + iput(inode); | |
138 | + return err; | |
139 | +} | |
140 | + | |
141 | +static int bdev_close(kdev_t dev) | |
142 | +{ | |
143 | + int err; | |
144 | + struct inode * inode = get_empty_inode(); | |
145 | + | |
146 | + if (!inode) | |
147 | + return -ENOMEM; | |
148 | + | |
149 | + inode->i_rdev = dev; | |
150 | + err = blkdev_release(inode); | |
151 | + iput(inode); | |
152 | + return err; | |
153 | +} | |
154 | + | |
155 | + | |
156 | + | |
157 | +/* | |
158 | + * Open/close code for raw IO. | |
159 | + */ | |
160 | + | |
161 | +int raw_open(struct inode *inode, struct file *filp) | |
162 | +{ | |
163 | + int minor; | |
164 | + kdev_t bdev; | |
165 | + int err; | |
166 | + int sector_size; | |
167 | + int sector_bits; | |
168 | + | |
169 | + minor = MINOR(inode->i_rdev); | |
170 | + | |
171 | + /* | |
172 | + * Is it the control device? | |
173 | + */ | |
174 | + | |
175 | + if (minor == 0) { | |
176 | + filp->f_op = &raw_ctl_fops; | |
177 | + return 0; | |
178 | + } | |
179 | + | |
180 | + /* | |
181 | + * No, it is a normal raw device. All we need to do on open is | |
182 | + * to check that the device is bound, and force the underlying | |
183 | + * block device to a sector-size blocksize. | |
184 | + */ | |
185 | + | |
186 | + bdev = raw_device_bindings[minor]; | |
187 | + if (bdev == NODEV) | |
188 | + return -ENODEV; | |
189 | + | |
190 | + err = bdev_open(bdev, filp->f_mode); | |
191 | + if (err) | |
192 | + return err; | |
193 | + | |
194 | + /* | |
195 | + * Don't change the blocksize if we already have users using | |
196 | + * this device | |
197 | + */ | |
198 | + | |
199 | + if (raw_device_inuse[minor]++) | |
200 | + return 0; | |
201 | + | |
202 | + /* | |
203 | + * Don't interfere with mounted devices: we cannot safely set | |
204 | + * the blocksize on a device which is already mounted. | |
205 | + */ | |
206 | + | |
207 | + sector_size = 512; | |
208 | + if (lookup_vfsmnt(bdev) != NULL) { | |
209 | + if (blksize_size[MAJOR(bdev)]) | |
210 | + sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)]; | |
211 | + } else { | |
212 | + if (hardsect_size[MAJOR(bdev)]) | |
213 | + sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)]; | |
214 | + } | |
215 | + | |
216 | + set_blocksize(bdev, sector_size); | |
217 | + raw_device_sector_size[minor] = sector_size; | |
218 | + | |
219 | + for (sector_bits = 0; !(sector_size & 1); ) | |
220 | + sector_size>>=1, sector_bits++; | |
221 | + raw_device_sector_bits[minor] = sector_bits; | |
222 | + | |
223 | + return 0; | |
224 | +} | |
225 | + | |
226 | +int raw_release(struct inode *inode, struct file *filp) | |
227 | +{ | |
228 | + int minor; | |
229 | + kdev_t bdev; | |
230 | + | |
231 | + minor = MINOR(inode->i_rdev); | |
232 | + bdev = raw_device_bindings[minor]; | |
233 | + bdev_close(bdev); | |
234 | + raw_device_inuse[minor]--; | |
235 | + return 0; | |
236 | +} | |
237 | + | |
238 | + | |
239 | + | |
240 | +/* | |
241 | + * Deal with ioctls against the raw-device control interface, to bind | |
242 | + * and unbind other raw devices. | |
243 | + */ | |
244 | + | |
245 | +int raw_ctl_ioctl(struct inode *inode, | |
246 | + struct file *flip, | |
247 | + unsigned int command, | |
248 | + unsigned long arg) | |
249 | +{ | |
250 | + struct raw_config_request rq; | |
251 | + int err = 0; | |
252 | + int minor; | |
253 | + | |
254 | + switch (command) { | |
255 | + case RAW_SETBIND: | |
256 | + case RAW_GETBIND: | |
257 | + | |
258 | + /* First, find out which raw minor we want */ | |
259 | + | |
260 | + err = copy_from_user(&rq, (void *) arg, sizeof(rq)); | |
261 | + if (err) | |
262 | + break; | |
263 | + | |
264 | + minor = rq.raw_minor; | |
265 | + if (minor == 0 || minor > MINORMASK) { | |
266 | + err = -EINVAL; | |
267 | + break; | |
268 | + } | |
269 | + | |
270 | + if (command == RAW_SETBIND) { | |
271 | + /* | |
272 | + * For now, we don't need to check that the underlying | |
273 | + * block device is present or not: we can do that when | |
274 | + * the raw device is opened. Just check that the | |
275 | + * major/minor numbers make sense. | |
276 | + */ | |
277 | + | |
278 | + if (rq.block_major == NODEV || | |
279 | + rq.block_major > MAX_BLKDEV || | |
280 | + rq.block_minor > MINORMASK) { | |
281 | + err = -EINVAL; | |
282 | + break; | |
283 | + } | |
284 | + | |
285 | + if (raw_device_inuse[minor]) { | |
286 | + err = -EBUSY; | |
287 | + break; | |
288 | + } | |
289 | + raw_device_bindings[minor] = | |
290 | + MKDEV(rq.block_major, rq.block_minor); | |
291 | + } else { | |
292 | + rq.block_major = MAJOR(raw_device_bindings[minor]); | |
293 | + rq.block_minor = MINOR(raw_device_bindings[minor]); | |
294 | + err = copy_to_user((void *) arg, &rq, sizeof(rq)); | |
295 | + } | |
296 | + break; | |
297 | + | |
298 | + default: | |
299 | + err = -EINVAL; | |
300 | + } | |
301 | + | |
302 | + return err; | |
303 | +} | |
304 | + | |
305 | + | |
306 | + | |
307 | +ssize_t raw_read(struct file *filp, char * buf, | |
308 | + size_t size, loff_t *offp) | |
309 | +{ | |
310 | + return rw_raw_dev(READ, filp, buf, size, offp); | |
311 | +} | |
312 | + | |
313 | +ssize_t raw_write(struct file *filp, const char *buf, | |
314 | + size_t size, loff_t *offp) | |
315 | +{ | |
316 | + return rw_raw_dev(WRITE, filp, (char *) buf, size, offp); | |
317 | +} | |
318 | + | |
319 | +#define SECTOR_BITS 9 | |
320 | +#define SECTOR_SIZE (1U << SECTOR_BITS) | |
321 | +#define SECTOR_MASK (SECTOR_SIZE - 1) | |
322 | + | |
323 | +ssize_t rw_raw_dev(int rw, struct file *filp, char *buf, | |
324 | + size_t size, loff_t *offp) | |
325 | +{ | |
326 | + struct kiobuf * iobuf; | |
327 | + int err; | |
328 | + unsigned long blocknr, blocks; | |
329 | + unsigned long b[KIO_MAX_SECTORS]; | |
330 | + size_t transferred; | |
331 | + int iosize; | |
332 | + int i; | |
333 | + int minor; | |
334 | + kdev_t dev; | |
335 | + unsigned long limit; | |
336 | + | |
337 | + int sector_size, sector_bits, sector_mask; | |
338 | + int max_sectors; | |
339 | + | |
340 | + /* | |
341 | + * First, a few checks on device size limits | |
342 | + */ | |
343 | + | |
344 | + minor = MINOR(filp->f_dentry->d_inode->i_rdev); | |
345 | + dev = raw_device_bindings[minor]; | |
346 | + sector_size = raw_device_sector_size[minor]; | |
347 | + sector_bits = raw_device_sector_bits[minor]; | |
348 | + sector_mask = sector_size- 1; | |
349 | + max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9); | |
350 | + | |
351 | + if (blk_size[MAJOR(dev)]) | |
352 | + limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits; | |
353 | + else | |
354 | + limit = INT_MAX; | |
355 | + dprintk ("rw_raw_dev: dev %d:%d (+%d)\n", | |
356 | + MAJOR(dev), MINOR(dev), limit); | |
357 | + | |
358 | + if ((*offp & sector_mask) || (size & sector_mask)) | |
359 | + return -EINVAL; | |
360 | + if ((*offp >> sector_bits) >= limit) { | |
361 | + if (size) | |
362 | + return -ENXIO; | |
363 | + return 0; | |
364 | + } | |
365 | + | |
366 | + /* | |
367 | + * We'll just use one kiobuf | |
368 | + */ | |
369 | + | |
370 | + err = alloc_kiovec(1, &iobuf); | |
371 | + if (err) | |
372 | + return err; | |
373 | + | |
374 | + /* | |
375 | + * Split the IO into KIO_MAX_SECTORS chunks, mapping and | |
376 | + * unmapping the single kiobuf as we go to perform each chunk of | |
377 | + * IO. | |
378 | + */ | |
379 | + | |
380 | + transferred = 0; | |
381 | + blocknr = *offp >> sector_bits; | |
382 | + while (size > 0) { | |
383 | + blocks = size >> sector_bits; | |
384 | + if (blocks > max_sectors) | |
385 | + blocks = max_sectors; | |
386 | + if (blocks > limit - blocknr) | |
387 | + blocks = limit - blocknr; | |
388 | + if (!blocks) | |
389 | + break; | |
390 | + | |
391 | + iosize = blocks << sector_bits; | |
392 | + | |
393 | + err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); | |
394 | + if (err) | |
395 | + break; | |
396 | + | |
397 | + for (i=0; i < blocks; i++) | |
398 | + b[i] = blocknr++; | |
399 | + | |
400 | + err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size, 0); | |
401 | + | |
402 | + if (err >= 0) { | |
403 | + transferred += err; | |
404 | + size -= err; | |
405 | + buf += err; | |
406 | + } | |
407 | + | |
408 | + unmap_kiobuf(iobuf); | |
409 | + | |
410 | + if (err != iosize) | |
411 | + break; | |
412 | + } | |
413 | + | |
414 | + free_kiovec(1, &iobuf); | |
415 | + | |
416 | + if (transferred) { | |
417 | + *offp += transferred; | |
418 | + return transferred; | |
419 | + } | |
420 | + | |
421 | + return err; | |
422 | +} | |
423 | diff -Nru linux/fs/Makefile linux.new/fs/Makefile | |
424 | --- linux/fs/Makefile Thu Jan 4 06:58:46 2001 | |
425 | +++ linux.new/fs/Makefile Thu Jan 4 06:50:46 2001 | |
426 | @@ -13,7 +13,7 @@ | |
427 | O_OBJS = open.o read_write.o devices.o file_table.o buffer.o \ | |
428 | super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ | |
429 | ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \ | |
430 | - dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS) | |
431 | + dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS) | |
432 | ||
433 | MOD_LIST_NAME := FS_MODULES | |
434 | ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \ | |
435 | diff -Nru linux/fs/buffer.c linux.new/fs/buffer.c | |
436 | --- linux/fs/buffer.c Thu Jan 4 06:58:46 2001 | |
437 | +++ linux.new/fs/buffer.c Thu Jan 4 06:50:46 2001 | |
438 | @@ -43,6 +43,7 @@ | |
439 | #include <linux/file.h> | |
440 | #include <linux/init.h> | |
441 | #include <linux/quotaops.h> | |
442 | +#include <linux/iobuf.h> | |
443 | ||
444 | #include <asm/uaccess.h> | |
445 | #include <asm/io.h> | |
446 | @@ -1259,6 +1260,180 @@ | |
447 | bad_count: | |
448 | printk ("Whoops: end_buffer_io_async: b_count != 1 on async io.\n"); | |
449 | return; | |
450 | +} | |
451 | + | |
452 | + | |
453 | +/* | |
454 | + * For brw_kiovec: submit a set of buffer_head temporary IOs and wait | |
455 | + * for them to complete. Clean up the buffer_heads afterwards. | |
456 | + */ | |
457 | + | |
458 | +#define dprintk(x...) | |
459 | + | |
460 | +static int do_kio(int rw, int nr, struct buffer_head *bh[], int size) | |
461 | +{ | |
462 | + int iosize; | |
463 | + int i; | |
464 | + int err; | |
465 | + struct buffer_head *tmp; | |
466 | + | |
467 | + dprintk ("do_kio start\n"); | |
468 | + | |
469 | + ll_rw_block(rw, nr, bh); | |
470 | + iosize = err = 0; | |
471 | + | |
472 | + for (i = nr; --i >= 0; ) { | |
473 | + tmp = bh[i]; | |
474 | + wait_on_buffer(tmp); | |
475 | + if (!buffer_uptodate(tmp)) { | |
476 | + err = -EIO; | |
477 | + /* We are waiting on bh'es in reverse order so | |
478 | + clearing iosize on error calculates the | |
479 | + amount of IO before the first error. */ | |
480 | + iosize = 0; | |
481 | + } | |
482 | + | |
483 | + put_unused_buffer_head(tmp); | |
484 | + iosize += size; | |
485 | + } | |
486 | + wake_up(&buffer_wait); | |
487 | + | |
488 | + dprintk ("do_kio end %d %d\n", iosize, err); | |
489 | + | |
490 | + if (iosize) | |
491 | + return iosize; | |
492 | + else | |
493 | + return err; | |
494 | +} | |
495 | + | |
496 | +/* | |
497 | + * Start I/O on a physical range of kernel memory, defined by a vector | |
498 | + * of kiobuf structs (much like a user-space iovec list). | |
499 | + * | |
500 | + * IO is submitted asynchronously: you need to check page->locked, | |
501 | + * page->uptodate, and maybe wait on page->wait. | |
502 | + * | |
503 | + * It is up to the caller to make sure that there are enough blocks | |
504 | + * passed in to completely map the iobufs to disk. */ | |
505 | + | |
506 | +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], | |
507 | + kdev_t dev, unsigned long b[], int size, int bmap) | |
508 | +{ | |
509 | + int err; | |
510 | + int length; | |
511 | + int transferred; | |
512 | + int i; | |
513 | + int bufind; | |
514 | + int pageind; | |
515 | + int bhind; | |
516 | + int offset; | |
517 | + unsigned long blocknr; | |
518 | + struct kiobuf * iobuf = NULL; | |
519 | + unsigned long page; | |
520 | + struct page * map; | |
521 | + struct buffer_head *tmp, *bh[KIO_MAX_SECTORS]; | |
522 | + | |
523 | + /* | |
524 | + * First, do some alignment and validity checks | |
525 | + */ | |
526 | + for (i = 0; i < nr; i++) { | |
527 | + iobuf = iovec[i]; | |
528 | + if ((iobuf->offset & (size-1)) || | |
529 | + (iobuf->length & (size-1))) | |
530 | + return -EINVAL; | |
531 | + if (!iobuf->nr_pages) | |
532 | + panic("brw_kiovec: iobuf not initialised"); | |
533 | + } | |
534 | + | |
535 | + /* DEBUG */ | |
536 | +#if 0 | |
537 | + return iobuf->length; | |
538 | +#endif | |
539 | + dprintk ("brw_kiovec: start\n"); | |
540 | + | |
541 | + /* | |
542 | + * OK to walk down the iovec doing page IO on each page we find. | |
543 | + */ | |
544 | + bufind = bhind = transferred = err = 0; | |
545 | + for (i = 0; i < nr; i++) { | |
546 | + iobuf = iovec[i]; | |
547 | + offset = iobuf->offset; | |
548 | + length = iobuf->length; | |
549 | + dprintk ("iobuf %d %d %d\n", offset, length, size); | |
550 | + | |
551 | + for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { | |
552 | + page = iobuf->pagelist[pageind]; | |
553 | + map = iobuf->maplist[pageind]; | |
554 | + | |
555 | + while (length > 0) { | |
556 | + blocknr = b[bufind++]; | |
557 | + tmp = get_unused_buffer_head(0); | |
558 | + if (!tmp) { | |
559 | + err = -ENOMEM; | |
560 | + goto error; | |
561 | + } | |
562 | + | |
563 | + tmp->b_dev = B_FREE; | |
564 | + tmp->b_size = size; | |
565 | + tmp->b_data = (char *) (page + offset); | |
566 | + tmp->b_this_page = tmp; | |
567 | + | |
568 | + init_buffer(tmp, dev, blocknr, | |
569 | + end_buffer_io_sync, NULL); | |
570 | + if (rw == WRITE) { | |
571 | + set_bit(BH_Uptodate, &tmp->b_state); | |
572 | + set_bit(BH_Dirty, &tmp->b_state); | |
573 | + } | |
574 | + | |
575 | + dprintk ("buffer %d (%d) at %p\n", | |
576 | + bhind, tmp->b_blocknr, tmp->b_data); | |
577 | + bh[bhind++] = tmp; | |
578 | + length -= size; | |
579 | + offset += size; | |
580 | + | |
581 | + /* | |
582 | + * Start the IO if we have got too much or if | |
583 | + * this is the end of the last iobuf | |
584 | + */ | |
585 | + if (bhind >= KIO_MAX_SECTORS) { | |
586 | + err = do_kio(rw, bhind, bh, size); | |
587 | + if (err >= 0) | |
588 | + transferred += err; | |
589 | + else | |
590 | + goto finished; | |
591 | + bhind = 0; | |
592 | + } | |
593 | + | |
594 | + if (offset >= PAGE_SIZE) { | |
595 | + offset = 0; | |
596 | + break; | |
597 | + } | |
598 | + } /* End of block loop */ | |
599 | + } /* End of page loop */ | |
600 | + } /* End of iovec loop */ | |
601 | + | |
602 | + /* Is there any IO still left to submit? */ | |
603 | + if (bhind) { | |
604 | + err = do_kio(rw, bhind, bh, size); | |
605 | + if (err >= 0) | |
606 | + transferred += err; | |
607 | + else | |
608 | + goto finished; | |
609 | + } | |
610 | + | |
611 | + finished: | |
612 | + dprintk ("brw_kiovec: end (%d, %d)\n", transferred, err); | |
613 | + if (transferred) | |
614 | + return transferred; | |
615 | + return err; | |
616 | + | |
617 | + error: | |
618 | + /* We got an error allocation the bh'es. Just free the current | |
619 | + buffer_heads and exit. */ | |
620 | + for (i = 0; i < bhind; i++) | |
621 | + put_unused_buffer_head(bh[i]); | |
622 | + wake_up(&buffer_wait); | |
623 | + goto finished; | |
624 | } | |
625 | ||
626 | /* | |
627 | diff -Nru linux/fs/iobuf.c linux.new/fs/iobuf.c | |
628 | --- linux/fs/iobuf.c Thu Jan 1 01:00:00 1970 | |
629 | +++ linux.new/fs/iobuf.c Thu Jan 4 06:50:46 2001 | |
630 | @@ -0,0 +1,106 @@ | |
631 | +/* | |
632 | + * iobuf.c | |
633 | + * | |
634 | + * Keep track of the general-purpose IO-buffer structures used to track | |
635 | + * abstract kernel-space io buffers. | |
636 | + * | |
637 | + */ | |
638 | + | |
639 | +#include <linux/iobuf.h> | |
640 | +#include <linux/malloc.h> | |
641 | +#include <linux/slab.h> | |
642 | + | |
643 | +static kmem_cache_t *kiobuf_cachep; | |
644 | + | |
645 | +void __init kiobuf_init(void) | |
646 | +{ | |
647 | + kiobuf_cachep = kmem_cache_create("kiobuf", | |
648 | + sizeof(struct kiobuf), | |
649 | + 0, | |
650 | + SLAB_HWCACHE_ALIGN, NULL, NULL); | |
651 | + if(!kiobuf_cachep) | |
652 | + panic("Cannot create kernel iobuf cache\n"); | |
653 | +} | |
654 | + | |
655 | + | |
656 | +int alloc_kiovec(int nr, struct kiobuf **bufp) | |
657 | +{ | |
658 | + int i; | |
659 | + struct kiobuf *iobuf; | |
660 | + | |
661 | + for (i = 0; i < nr; i++) { | |
662 | + iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL); | |
663 | + if (!iobuf) { | |
664 | + free_kiovec(i, bufp); | |
665 | + return -ENOMEM; | |
666 | + } | |
667 | + | |
668 | + memset(iobuf, 0, sizeof(*iobuf)); | |
669 | + iobuf->array_len = KIO_STATIC_PAGES; | |
670 | + iobuf->pagelist = iobuf->page_array; | |
671 | + iobuf->maplist = iobuf->map_array; | |
672 | + *bufp++ = iobuf; | |
673 | + } | |
674 | + | |
675 | + return 0; | |
676 | +} | |
677 | + | |
678 | +void free_kiovec(int nr, struct kiobuf **bufp) | |
679 | +{ | |
680 | + struct kiobuf *iobuf; | |
681 | + int i; | |
682 | + | |
683 | + for (i = 0; i < nr; i++) { | |
684 | + iobuf = bufp[i]; | |
685 | + if (iobuf->array_len > KIO_STATIC_PAGES) { | |
686 | + kfree (iobuf->pagelist); | |
687 | + kfree (iobuf->maplist); | |
688 | + } | |
689 | + kmem_cache_free(kiobuf_cachep, bufp[i]); | |
690 | + } | |
691 | +} | |
692 | + | |
693 | +int expand_kiobuf(struct kiobuf *iobuf, int wanted) | |
694 | +{ | |
695 | + unsigned long * pagelist; | |
696 | + struct page ** maplist; | |
697 | + | |
698 | + if (iobuf->array_len >= wanted) | |
699 | + return 0; | |
700 | + | |
701 | + pagelist = (unsigned long *) | |
702 | + kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL); | |
703 | + if (!pagelist) | |
704 | + return -ENOMEM; | |
705 | + | |
706 | + maplist = (struct page **) | |
707 | + kmalloc(wanted * sizeof(struct page **), GFP_KERNEL); | |
708 | + if (!maplist) { | |
709 | + kfree(pagelist); | |
710 | + return -ENOMEM; | |
711 | + } | |
712 | + | |
713 | + /* Did it grow while we waited? */ | |
714 | + if (iobuf->array_len >= wanted) { | |
715 | + kfree(pagelist); | |
716 | + kfree(maplist); | |
717 | + return 0; | |
718 | + } | |
719 | + | |
720 | + memcpy (pagelist, iobuf->pagelist, | |
721 | + iobuf->array_len * sizeof(unsigned long)); | |
722 | + memcpy (maplist, iobuf->maplist, | |
723 | + iobuf->array_len * sizeof(struct page **)); | |
724 | + | |
725 | + if (iobuf->array_len > KIO_STATIC_PAGES) { | |
726 | + kfree (iobuf->pagelist); | |
727 | + kfree (iobuf->maplist); | |
728 | + } | |
729 | + | |
730 | + iobuf->pagelist = pagelist; | |
731 | + iobuf->maplist = maplist; | |
732 | + iobuf->array_len = wanted; | |
733 | + return 0; | |
734 | +} | |
735 | + | |
736 | + | |
737 | diff -Nru linux/include/linux/iobuf.h linux.new/include/linux/iobuf.h | |
738 | --- linux/include/linux/iobuf.h Thu Jan 1 01:00:00 1970 | |
739 | +++ linux.new/include/linux/iobuf.h Thu Jan 4 06:50:47 2001 | |
740 | @@ -0,0 +1,70 @@ | |
741 | +/* | |
742 | + * iobuf.h | |
743 | + * | |
744 | + * Defines the structures used to track abstract kernel-space io buffers. | |
745 | + * | |
746 | + */ | |
747 | + | |
748 | +#ifndef __LINUX_IOBUF_H | |
749 | +#define __LINUX_IOBUF_H | |
750 | + | |
751 | +#include <linux/mm.h> | |
752 | +#include <linux/init.h> | |
753 | + | |
754 | +/* | |
755 | + * The kiobuf structure describes a physical set of pages reserved | |
756 | + * locked for IO. The reference counts on each page will have been | |
757 | + * incremented, and the flags field will indicate whether or not we have | |
758 | + * pre-locked all of the pages for IO. | |
759 | + * | |
760 | + * kiobufs may be passed in arrays to form a kiovec, but we must | |
761 | + * preserve the property that no page is present more than once over the | |
762 | + * entire iovec. | |
763 | + */ | |
764 | + | |
765 | +#define KIO_MAX_ATOMIC_IO 64 /* in kb */ | |
766 | +#define KIO_MAX_ATOMIC_BYTES (64 * 1024) | |
767 | +#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10)) | |
768 | +#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2) | |
769 | + | |
770 | +struct kiobuf | |
771 | +{ | |
772 | + int nr_pages; /* Pages actually referenced */ | |
773 | + int array_len; /* Space in the allocated lists */ | |
774 | + int offset; /* Offset to start of valid data */ | |
775 | + int length; /* Number of valid bytes of data */ | |
776 | + | |
777 | + /* Keep separate track of the physical addresses and page | |
778 | + * structs involved. If we do IO to a memory-mapped device | |
779 | + * region, there won't necessarily be page structs defined for | |
780 | + * every address. */ | |
781 | + | |
782 | + unsigned long * pagelist; | |
783 | + struct page ** maplist; | |
784 | + | |
785 | + unsigned int locked : 1; /* If set, pages has been locked */ | |
786 | + | |
787 | + /* Always embed enough struct pages for 64k of IO */ | |
788 | + unsigned long page_array[KIO_STATIC_PAGES]; | |
789 | + struct page * map_array[KIO_STATIC_PAGES]; | |
790 | +}; | |
791 | + | |
792 | + | |
793 | +/* mm/memory.c */ | |
794 | + | |
795 | +int map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len); | |
796 | +void unmap_kiobuf(struct kiobuf *iobuf); | |
797 | + | |
798 | +/* fs/iobuf.c */ | |
799 | + | |
800 | +void __init kiobuf_init(void); | |
801 | +int alloc_kiovec(int nr, struct kiobuf **); | |
802 | +void free_kiovec(int nr, struct kiobuf **); | |
803 | +int expand_kiobuf(struct kiobuf *, int); | |
804 | + | |
805 | +/* fs/buffer.c */ | |
806 | + | |
807 | +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], | |
808 | + kdev_t dev, unsigned long b[], int size, int bmap); | |
809 | + | |
810 | +#endif /* __LINUX_IOBUF_H */ | |
811 | diff -Nru linux/include/linux/major.h linux.new/include/linux/major.h | |
812 | --- linux/include/linux/major.h Thu Jan 4 06:58:46 2001 | |
813 | +++ linux.new/include/linux/major.h Thu Jan 4 06:50:47 2001 | |
814 | @@ -131,6 +131,8 @@ | |
815 | #define IDE8_MAJOR 90 | |
816 | #define IDE9_MAJOR 91 | |
817 | ||
818 | +#define RAW_MAJOR 162 | |
819 | + | |
820 | #define UNIX98_PTY_MASTER_MAJOR 128 | |
821 | #define UNIX98_PTY_MAJOR_COUNT 8 | |
822 | #define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT) | |
823 | diff -Nru linux/include/linux/raw.h linux.new/include/linux/raw.h | |
824 | --- linux/include/linux/raw.h Thu Jan 1 01:00:00 1970 | |
825 | +++ linux.new/include/linux/raw.h Thu Jan 4 06:50:47 2001 | |
826 | @@ -0,0 +1,23 @@ | |
827 | +#ifndef __LINUX_RAW_H | |
828 | +#define __LINUX_RAW_H | |
829 | + | |
830 | +#include <linux/types.h> | |
831 | + | |
832 | +#define RAW_SETBIND _IO( 0xac, 0 ) | |
833 | +#define RAW_GETBIND _IO( 0xac, 1 ) | |
834 | + | |
835 | +struct raw_config_request | |
836 | +{ | |
837 | + int raw_minor; | |
838 | + __u64 block_major; | |
839 | + __u64 block_minor; | |
840 | +}; | |
841 | + | |
842 | +#ifdef __KERNEL__ | |
843 | + | |
844 | +/* drivers/char/raw.c */ | |
845 | +extern void raw_init(void); | |
846 | + | |
847 | +#endif /* __KERNEL__ */ | |
848 | + | |
849 | +#endif /* __LINUX_RAW_H */ | |
850 | diff -Nru linux/init/main.c linux.new/init/main.c | |
851 | --- linux/init/main.c Thu Jan 4 06:58:46 2001 | |
852 | +++ linux.new/init/main.c Thu Jan 4 06:50:47 2001 | |
853 | @@ -23,6 +23,7 @@ | |
854 | #include <linux/smp_lock.h> | |
855 | #include <linux/blk.h> | |
856 | #include <linux/hdreg.h> | |
857 | +#include <linux/iobuf.h> | |
858 | #include <linux/init.h> | |
859 | ||
860 | #include <asm/io.h> | |
861 | @@ -1461,6 +1462,7 @@ | |
862 | #ifdef CONFIG_ARCH_S390 | |
863 | ccwcache_init(); | |
864 | #endif | |
865 | + kiobuf_init(); | |
866 | signals_init(); | |
867 | inode_init(); | |
868 | file_table_init(); | |
869 | diff -Nru linux/kernel/ksyms.c linux.new/kernel/ksyms.c | |
870 | --- linux/kernel/ksyms.c Thu Jan 4 06:58:46 2001 | |
871 | +++ linux.new/kernel/ksyms.c Thu Jan 4 06:50:47 2001 | |
872 | @@ -37,6 +37,7 @@ | |
873 | #include <linux/poll.h> | |
874 | #include <linux/mm.h> | |
875 | #include <linux/capability.h> | |
876 | +#include <linux/iobuf.h> | |
877 | ||
878 | #if defined(CONFIG_PROC_FS) | |
879 | #include <linux/proc_fs.h> | |
880 | @@ -266,6 +267,14 @@ | |
881 | EXPORT_SYMBOL(max_sectors); | |
882 | EXPORT_SYMBOL(max_segments); | |
883 | EXPORT_SYMBOL(max_readahead); | |
884 | + | |
885 | +/* kiobuf support */ | |
886 | +EXPORT_SYMBOL(map_user_kiobuf); | |
887 | +EXPORT_SYMBOL(unmap_kiobuf); | |
888 | +EXPORT_SYMBOL(alloc_kiovec); | |
889 | +EXPORT_SYMBOL(free_kiovec); | |
890 | +EXPORT_SYMBOL(expand_kiobuf); | |
891 | +EXPORT_SYMBOL(brw_kiovec); | |
892 | ||
893 | /* tty routines */ | |
894 | EXPORT_SYMBOL(tty_hangup); | |
895 | diff -Nru linux/mm/memory.c linux.new/mm/memory.c | |
896 | --- linux/mm/memory.c Thu Jan 4 06:58:46 2001 | |
897 | +++ linux.new/mm/memory.c Thu Jan 4 06:50:47 2001 | |
898 | @@ -37,6 +37,8 @@ | |
899 | #include <linux/mman.h> | |
900 | #include <linux/swap.h> | |
901 | #include <linux/smp_lock.h> | |
902 | +#include <linux/pagemap.h> | |
903 | +#include <linux/iobuf.h> | |
904 | ||
905 | #include <asm/uaccess.h> | |
906 | #include <asm/pgtable.h> | |
907 | @@ -395,6 +397,220 @@ | |
908 | if (mm->rss < 0) | |
909 | mm->rss = 0; | |
910 | } | |
911 | +} | |
912 | + | |
913 | + | |
914 | +/* | |
915 | + * Do a quick page-table lookup for a single page. | |
916 | + */ | |
917 | +static unsigned long get_page(unsigned long address, int write) | |
918 | +{ | |
919 | + pgd_t *pgd; | |
920 | + pmd_t *pmd; | |
921 | + | |
922 | + pgd = pgd_offset(current->mm, address); | |
923 | + pmd = pmd_offset(pgd, address); | |
924 | + if (pmd) { | |
925 | + pte_t * pte = pte_offset(pmd, address); | |
926 | + if (pte && pte_present(*pte)) { | |
927 | + if (!write || | |
928 | + (pte_write(*pte) && pte_dirty(*pte))) | |
929 | + return pte_page(*pte); | |
930 | + } | |
931 | + } | |
932 | + | |
933 | + return 0; | |
934 | +} | |
935 | + | |
936 | +/* | |
937 | + * Given a physical address, is there a useful struct page pointing to it? | |
938 | + */ | |
939 | + | |
940 | +static struct page * get_page_map(unsigned long page) | |
941 | +{ | |
942 | + struct page *map; | |
943 | + | |
944 | + if (MAP_NR(page) >= max_mapnr) | |
945 | + return 0; | |
946 | + if (page == ZERO_PAGE(page)) | |
947 | + return 0; | |
948 | + map = mem_map + MAP_NR(page); | |
949 | + if (PageReserved(map)) | |
950 | + return 0; | |
951 | + return map; | |
952 | +} | |
953 | + | |
954 | +/* | |
955 | + * Force in an entire range of pages from the current process's user VA, | |
956 | + * and pin and lock the pages for IO. | |
957 | + */ | |
958 | + | |
959 | +#define dprintk(x...) | |
960 | +int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) | |
961 | +{ | |
962 | + unsigned long ptr, end; | |
963 | + int err; | |
964 | + struct mm_struct * mm; | |
965 | + struct vm_area_struct * vma = 0; | |
966 | + unsigned long page; | |
967 | + struct page * map; | |
968 | + int doublepage = 0; | |
969 | + int repeat = 0; | |
970 | + int i; | |
971 | + /* if we read from disk it means we write to memory */ | |
972 | + int writemem = (rw == READ); | |
973 | + | |
974 | + /* Make sure the iobuf is not already mapped somewhere. */ | |
975 | + if (iobuf->nr_pages) | |
976 | + return -EINVAL; | |
977 | + | |
978 | + mm = current->mm; | |
979 | + dprintk ("map_user_kiobuf: begin\n"); | |
980 | + | |
981 | + ptr = va & PAGE_MASK; | |
982 | + end = (va + len + PAGE_SIZE - 1) & PAGE_MASK; | |
983 | + err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT); | |
984 | + if (err) | |
985 | + return err; | |
986 | + | |
987 | + repeat: | |
988 | + down(&mm->mmap_sem); | |
989 | + | |
990 | + err = -EFAULT; | |
991 | + iobuf->locked = writemem; | |
992 | + iobuf->offset = va & ~PAGE_MASK; | |
993 | + iobuf->length = len; | |
994 | + | |
995 | + i = 0; | |
996 | + | |
997 | + /* | |
998 | + * First of all, try to fault in all of the necessary pages | |
999 | + */ | |
1000 | + while (ptr < end) { | |
1001 | + if (!vma || ptr >= vma->vm_end) { | |
1002 | + vma = find_vma(mm, ptr); | |
1003 | + if (!vma) | |
1004 | + goto out; | |
1005 | + if (vma->vm_start > ptr) { | |
1006 | + if (!(vma->vm_flags & VM_GROWSDOWN)) | |
1007 | + goto out; | |
1008 | + if (expand_stack(vma, ptr)) | |
1009 | + goto out; | |
1010 | + } | |
1011 | + err = -EACCES; | |
1012 | + if (writemem) { | |
1013 | + if (!(vma->vm_flags & VM_WRITE)) | |
1014 | + goto out; | |
1015 | + } else { | |
1016 | + if (!(vma->vm_flags & VM_READ)) | |
1017 | + goto out; | |
1018 | + } | |
1019 | + err = -EFAULT; | |
1020 | + } | |
1021 | + while (!(page = get_page(ptr, writemem))) { | |
1022 | + int ret; | |
1023 | + | |
1024 | + ret = handle_mm_fault(current, vma, ptr, writemem); | |
1025 | + if (ret <= 0) { | |
1026 | + if (!ret) | |
1027 | + goto out; | |
1028 | + else { | |
1029 | + err = -ENOMEM; | |
1030 | + goto out; | |
1031 | + } | |
1032 | + } | |
1033 | + } | |
1034 | + map = get_page_map(page); | |
1035 | + if (map) { | |
1036 | + if (writemem) { | |
1037 | + /* | |
1038 | + * Lock down the pages only if we're going | |
1039 | + * to write to memory. If if we're reading | |
1040 | + * from memory we're free to go ahead | |
1041 | + * only after pinning the page on the | |
1042 | + * physical side. | |
1043 | + */ | |
1044 | + if (PageLocked(map)) | |
1045 | + goto retry; | |
1046 | + set_bit(PG_locked, &map->flags); | |
1047 | + } | |
1048 | + flush_dcache_page(page_address(map)); | |
1049 | + atomic_inc(&map->count); | |
1050 | + } | |
1051 | + dprintk ("Installing page %p %p: %d\n", (void *)page, map, i); | |
1052 | + iobuf->pagelist[i] = page; | |
1053 | + iobuf->maplist[i] = map; | |
1054 | + iobuf->nr_pages = ++i; | |
1055 | + | |
1056 | + ptr += PAGE_SIZE; | |
1057 | + } | |
1058 | + | |
1059 | + up(&mm->mmap_sem); | |
1060 | + dprintk ("map_user_kiobuf: end OK\n"); | |
1061 | + return 0; | |
1062 | + | |
1063 | + out: | |
1064 | + up(&mm->mmap_sem); | |
1065 | + unmap_kiobuf(iobuf); | |
1066 | + dprintk ("map_user_kiobuf: end %d\n", err); | |
1067 | + return err; | |
1068 | + | |
1069 | + retry: | |
1070 | + | |
1071 | + /* | |
1072 | + * Undo the locking so far, wait on the page we got to, and try again. | |
1073 | + */ | |
1074 | + unmap_kiobuf(iobuf); | |
1075 | + up(&mm->mmap_sem); | |
1076 | + ptr = va & PAGE_MASK; | |
1077 | + | |
1078 | + /* | |
1079 | + * Did the release also unlock the page we got stuck on? | |
1080 | + */ | |
1081 | + if (!PageLocked(map)) { | |
1082 | + /* If so, we may well have the page mapped twice in the | |
1083 | + * IO address range. Bad news. Of course, it _might_ | |
1084 | + * just be a coincidence, but if it happens more than | |
1085 | + * once, chances are we have a double-mapped page. */ | |
1086 | + if (++doublepage >= 3) { | |
1087 | + return -EINVAL; | |
1088 | + } | |
1089 | + } | |
1090 | + | |
1091 | + /* | |
1092 | + * Try again... | |
1093 | + */ | |
1094 | + wait_on_page(map); | |
1095 | + if (++repeat < 16) | |
1096 | + goto repeat; | |
1097 | + return -EAGAIN; | |
1098 | +} | |
1099 | + | |
1100 | + | |
1101 | +/* | |
1102 | + * Unmap all of the pages referenced by a kiobuf. We release the pages, | |
1103 | + * and unlock them if they were locked. | |
1104 | + */ | |
1105 | + | |
1106 | +void unmap_kiobuf (struct kiobuf *iobuf) | |
1107 | +{ | |
1108 | + int i; | |
1109 | + struct page *map; | |
1110 | + | |
1111 | + for (i = 0; i < iobuf->nr_pages; i++) { | |
1112 | + map = iobuf->maplist[i]; | |
1113 | + | |
1114 | + if (map) { | |
1115 | + if (iobuf->locked) { | |
1116 | + clear_bit(PG_locked, &map->flags); | |
1117 | + wake_up(&map->wait); | |
1118 | + } | |
1119 | + __free_page(map); | |
1120 | + } | |
1121 | + } | |
1122 | + | |
1123 | + iobuf->nr_pages = 0; | |
1124 | + iobuf->locked = 0; | |
1125 | } | |
1126 | ||
1127 | static inline void zeromap_pte_range(pte_t * pte, unsigned long address, |