]> git.pld-linux.org Git - packages/kernel.git/blob - linux-raw.patch
- added description of djurban's branch
[packages/kernel.git] / linux-raw.patch
1 diff -Nru linux/drivers/char/Makefile linux.new/drivers/char/Makefile
2 --- linux/drivers/char/Makefile Thu Jan  4 06:58:46 2001
3 +++ linux.new/drivers/char/Makefile     Thu Jan  4 06:50:46 2001
4 @@ -20,7 +20,7 @@
5  
6  O_TARGET := char.o
7  M_OBJS   :=
8 -O_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o
9 +O_OBJS   := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o
10  OX_OBJS  := pty.o misc.o
11  obj-y   :=
12  obj-m   :=
13 diff -Nru linux/drivers/char/mem.c linux.new/drivers/char/mem.c
14 --- linux/drivers/char/mem.c    Thu Jan  4 06:58:46 2001
15 +++ linux.new/drivers/char/mem.c        Thu Jan  4 06:59:49 2001
16 @@ -17,6 +17,7 @@
17  #include <linux/joystick.h>
18  #include <linux/i2c.h>
19  #include <linux/capability.h>
20 +#include <linux/raw.h>
21  
22  #include <asm/uaccess.h>
23  #include <asm/io.h>
24 @@ -608,6 +609,7 @@
25         if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
26                 printk("unable to get major %d for memory devs\n", MEM_MAJOR);
27         rand_initialize();
28 +       raw_init();
29  #if defined (CONFIG_FB)
30         fbmem_init();
31  #endif
32 diff -Nru linux/drivers/char/raw.c linux.new/drivers/char/raw.c
33 --- linux/drivers/char/raw.c    Thu Jan  1 01:00:00 1970
34 +++ linux.new/drivers/char/raw.c        Thu Jan  4 06:50:46 2001
35 @@ -0,0 +1,387 @@
36 +/*
37 + * linux/drivers/char/raw.c
38 + *
39 + * Front-end raw character devices.  These can be bound to any block
40 + * devices to provide genuine Unix raw character device semantics.
41 + *
42 + * We reserve minor number 0 for a control interface.  ioctl()s on this
43 + * device are used to bind the other minor numbers to block devices.
44 + */
45 +
46 +#include <linux/fs.h>
47 +#include <linux/iobuf.h>
48 +#include <linux/major.h>
49 +#include <linux/blkdev.h>
50 +#include <linux/raw.h>
51 +#include <asm/uaccess.h>
52 +
53 +#define dprintk(x...) 
54 +
55 +static kdev_t raw_device_bindings[256] = {};
56 +static int raw_device_inuse[256] = {};
57 +static int raw_device_sector_size[256] = {};
58 +static int raw_device_sector_bits[256] = {};
59 +
60 +extern struct file_operations * get_blkfops(unsigned int major);
61 +
62 +static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
63 +
64 +ssize_t        raw_read(struct file *, char *, size_t, loff_t *);
65 +ssize_t        raw_write(struct file *, const char *, size_t, loff_t *);
66 +int    raw_open(struct inode *, struct file *);
67 +int    raw_release(struct inode *, struct file *);
68 +int    raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
69 +
70 +
71 +static struct file_operations raw_fops = {
72 +       NULL,           /* llseek */
73 +       raw_read,       /* read */
74 +       raw_write,      /* write */
75 +       NULL,           /* readdir */
76 +       NULL,           /* poll */
77 +       NULL,           /* ioctl */
78 +       NULL,           /* mmap */
79 +       raw_open,       /* open */
80 +       NULL,           /* flush */
81 +       raw_release,    /* release */
82 +       NULL            /* fsync */
83 +};
84 +
85 +static struct file_operations raw_ctl_fops = {
86 +       NULL,           /* llseek */
87 +       NULL,           /* read */
88 +       NULL,           /* write */
89 +       NULL,           /* readdir */
90 +       NULL,           /* poll */
91 +       raw_ctl_ioctl,  /* ioctl */
92 +       NULL,           /* mmap */
93 +       raw_open,       /* open */
94 +       NULL,           /* flush */
95 +       NULL,           /* no special release code */
96 +       NULL            /* fsync */
97 +};
98 +
99 +
100 +
101 +void __init raw_init(void)
102 +{
103 +       register_chrdev(RAW_MAJOR, "raw", &raw_fops);
104 +}
105 +
106 +
107 +/*
108 + * The raw IO open and release code needs to fake appropriate
109 + * open/release calls to the underlying block devices.  
110 + */
111 +
112 +static int bdev_open(kdev_t dev, int mode)
113 +{
114 +       int err = 0;
115 +       struct file dummy_file = {};
116 +       struct dentry dummy_dentry = {};
117 +       struct inode * inode = get_empty_inode();
118 +       
119 +       if (!inode)
120 +               return -ENOMEM;
121 +       
122 +       dummy_file.f_op = get_blkfops(MAJOR(dev));
123 +       if (!dummy_file.f_op) {
124 +               err = -ENODEV;
125 +               goto done;
126 +       }
127 +       
128 +       if (dummy_file.f_op->open) {
129 +               inode->i_rdev = dev;
130 +               dummy_dentry.d_inode = inode;
131 +               dummy_file.f_dentry = &dummy_dentry;
132 +               dummy_file.f_mode = mode;
133 +               err = dummy_file.f_op->open(inode, &dummy_file);
134 +       }
135 +
136 + done:
137 +       iput(inode);
138 +       return err;
139 +}
140 +
141 +static int bdev_close(kdev_t dev)
142 +{
143 +       int err;
144 +       struct inode * inode = get_empty_inode();
145 +
146 +       if (!inode)
147 +               return -ENOMEM;
148 +       
149 +       inode->i_rdev = dev;
150 +       err = blkdev_release(inode);
151 +       iput(inode);
152 +       return err;
153 +}
154 +
155 +
156 +
157 +/* 
158 + * Open/close code for raw IO.
159 + */
160 +
161 +int raw_open(struct inode *inode, struct file *filp)
162 +{
163 +       int minor;
164 +       kdev_t bdev;
165 +       int err;
166 +       int sector_size;
167 +       int sector_bits;
168 +
169 +       minor = MINOR(inode->i_rdev);
170 +       
171 +       /* 
172 +        * Is it the control device? 
173 +        */
174 +       
175 +       if (minor == 0) {
176 +               filp->f_op = &raw_ctl_fops;
177 +               return 0;
178 +       }
179 +       
180 +       /*
181 +        * No, it is a normal raw device.  All we need to do on open is
182 +        * to check that the device is bound, and force the underlying
183 +        * block device to a sector-size blocksize. 
184 +        */
185 +
186 +       bdev = raw_device_bindings[minor];
187 +       if (bdev == NODEV) 
188 +               return -ENODEV;
189 +
190 +       err = bdev_open(bdev, filp->f_mode);
191 +       if (err)
192 +               return err;
193 +       
194 +       /*
195 +        * Don't change the blocksize if we already have users using
196 +        * this device 
197 +        */
198 +
199 +       if (raw_device_inuse[minor]++)
200 +               return 0;
201 +       
202 +       /* 
203 +        * Don't interfere with mounted devices: we cannot safely set
204 +        * the blocksize on a device which is already mounted.  
205 +        */
206 +       
207 +       sector_size = 512;
208 +       if (lookup_vfsmnt(bdev) != NULL) {
209 +               if (blksize_size[MAJOR(bdev)])
210 +                       sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)];
211 +       } else {
212 +               if (hardsect_size[MAJOR(bdev)])
213 +                       sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)];
214 +       }
215 +
216 +       set_blocksize(bdev, sector_size);
217 +       raw_device_sector_size[minor] = sector_size;
218 +
219 +       for (sector_bits = 0; !(sector_size & 1); )
220 +               sector_size>>=1, sector_bits++;
221 +       raw_device_sector_bits[minor] = sector_bits;
222 +       
223 +       return 0;
224 +}
225 +
226 +int raw_release(struct inode *inode, struct file *filp)
227 +{
228 +       int minor;
229 +       kdev_t bdev;
230 +       
231 +       minor = MINOR(inode->i_rdev);
232 +       bdev = raw_device_bindings[minor];
233 +       bdev_close(bdev);
234 +       raw_device_inuse[minor]--;
235 +       return 0;
236 +}
237 +
238 +
239 +
240 +/*
241 + * Deal with ioctls against the raw-device control interface, to bind
242 + * and unbind other raw devices.  
243 + */
244 +
245 +int raw_ctl_ioctl(struct inode *inode, 
246 +                 struct file *flip,
247 +                 unsigned int command, 
248 +                 unsigned long arg)
249 +{
250 +       struct raw_config_request rq;
251 +       int err = 0;
252 +       int minor;
253 +       
254 +       switch (command) {
255 +       case RAW_SETBIND:
256 +       case RAW_GETBIND:
257 +
258 +               /* First, find out which raw minor we want */
259 +
260 +               err = copy_from_user(&rq, (void *) arg, sizeof(rq));
261 +               if (err)
262 +                       break;
263 +               
264 +               minor = rq.raw_minor;
265 +               if (minor == 0 || minor > MINORMASK) {
266 +                       err = -EINVAL;
267 +                       break;
268 +               }
269 +
270 +               if (command == RAW_SETBIND) {
271 +                       /* 
272 +                        * For now, we don't need to check that the underlying
273 +                        * block device is present or not: we can do that when
274 +                        * the raw device is opened.  Just check that the
275 +                        * major/minor numbers make sense. 
276 +                        */
277 +
278 +                       if (rq.block_major == NODEV || 
279 +                           rq.block_major > MAX_BLKDEV ||
280 +                           rq.block_minor > MINORMASK) {
281 +                               err = -EINVAL;
282 +                               break;
283 +                       }
284 +                       
285 +                       if (raw_device_inuse[minor]) {
286 +                               err = -EBUSY;
287 +                               break;
288 +                       }
289 +                       raw_device_bindings[minor] = 
290 +                               MKDEV(rq.block_major, rq.block_minor);
291 +               } else {
292 +                       rq.block_major = MAJOR(raw_device_bindings[minor]);
293 +                       rq.block_minor = MINOR(raw_device_bindings[minor]);
294 +                       err = copy_to_user((void *) arg, &rq, sizeof(rq));
295 +               }
296 +               break;
297 +               
298 +       default:
299 +               err = -EINVAL;
300 +       }
301 +       
302 +       return err;
303 +}
304 +
305 +
306 +
307 +ssize_t        raw_read(struct file *filp, char * buf, 
308 +                size_t size, loff_t *offp)
309 +{
310 +       return rw_raw_dev(READ, filp, buf, size, offp);
311 +}
312 +
313 +ssize_t        raw_write(struct file *filp, const char *buf, 
314 +                 size_t size, loff_t *offp)
315 +{
316 +       return rw_raw_dev(WRITE, filp, (char *) buf, size, offp);
317 +}
318 +
319 +#define SECTOR_BITS 9
320 +#define SECTOR_SIZE (1U << SECTOR_BITS)
321 +#define SECTOR_MASK (SECTOR_SIZE - 1)
322 +
323 +ssize_t        rw_raw_dev(int rw, struct file *filp, char *buf, 
324 +                  size_t size, loff_t *offp)
325 +{
326 +       struct kiobuf * iobuf;
327 +       int             err;
328 +       unsigned long   blocknr, blocks;
329 +       unsigned long   b[KIO_MAX_SECTORS];
330 +       size_t          transferred;
331 +       int             iosize;
332 +       int             i;
333 +       int             minor;
334 +       kdev_t          dev;
335 +       unsigned long   limit;
336 +
337 +       int             sector_size, sector_bits, sector_mask;
338 +       int             max_sectors;
339 +       
340 +       /*
341 +        * First, a few checks on device size limits 
342 +        */
343 +
344 +       minor = MINOR(filp->f_dentry->d_inode->i_rdev);
345 +       dev = raw_device_bindings[minor];
346 +       sector_size = raw_device_sector_size[minor];
347 +       sector_bits = raw_device_sector_bits[minor];
348 +       sector_mask = sector_size- 1;
349 +       max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
350 +       
351 +       if (blk_size[MAJOR(dev)])
352 +               limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
353 +       else
354 +               limit = INT_MAX;
355 +       dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
356 +                MAJOR(dev), MINOR(dev), limit);
357 +       
358 +       if ((*offp & sector_mask) || (size & sector_mask))
359 +               return -EINVAL;
360 +       if ((*offp >> sector_bits) >= limit)  {
361 +               if (size)
362 +                       return -ENXIO;
363 +               return 0;
364 +       }
365 +
366 +       /* 
367 +        * We'll just use one kiobuf
368 +        */
369 +
370 +       err = alloc_kiovec(1, &iobuf);
371 +       if (err)
372 +               return err;
373 +
374 +       /*
375 +        * Split the IO into KIO_MAX_SECTORS chunks, mapping and
376 +        * unmapping the single kiobuf as we go to perform each chunk of
377 +        * IO.  
378 +        */
379 +
380 +       transferred = 0;
381 +       blocknr = *offp >> sector_bits;
382 +       while (size > 0) {
383 +               blocks = size >> sector_bits;
384 +               if (blocks > max_sectors)
385 +                       blocks = max_sectors;
386 +               if (blocks > limit - blocknr)
387 +                       blocks = limit - blocknr;
388 +               if (!blocks)
389 +                       break;
390 +
391 +               iosize = blocks << sector_bits;
392 +               
393 +               err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
394 +               if (err)
395 +                       break;
396 +               
397 +               for (i=0; i < blocks; i++)
398 +                       b[i] = blocknr++;
399 +               
400 +               err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size, 0);
401 +
402 +               if (err >= 0) {
403 +                       transferred += err;
404 +                       size -= err;
405 +                       buf += err;
406 +               }
407 +
408 +               unmap_kiobuf(iobuf);
409 +
410 +               if (err != iosize)
411 +                       break;
412 +       }
413 +       
414 +       free_kiovec(1, &iobuf);
415 +
416 +       if (transferred) {
417 +               *offp += transferred;
418 +               return transferred;
419 +       }
420 +       
421 +       return err;
422 +}
423 diff -Nru linux/fs/Makefile linux.new/fs/Makefile
424 --- linux/fs/Makefile   Thu Jan  4 06:58:46 2001
425 +++ linux.new/fs/Makefile       Thu Jan  4 06:50:46 2001
426 @@ -13,7 +13,7 @@
427  O_OBJS    = open.o read_write.o devices.o file_table.o buffer.o \
428                 super.o  block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
429                 ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \
430 -               dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS) 
431 +               dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS) 
432  
433  MOD_LIST_NAME := FS_MODULES
434  ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \
435 diff -Nru linux/fs/buffer.c linux.new/fs/buffer.c
436 --- linux/fs/buffer.c   Thu Jan  4 06:58:46 2001
437 +++ linux.new/fs/buffer.c       Thu Jan  4 06:50:46 2001
438 @@ -43,6 +43,7 @@
439  #include <linux/file.h>
440  #include <linux/init.h>
441  #include <linux/quotaops.h>
442 +#include <linux/iobuf.h>
443  
444  #include <asm/uaccess.h>
445  #include <asm/io.h>
446 @@ -1259,6 +1260,180 @@
447  bad_count:
448         printk ("Whoops: end_buffer_io_async: b_count != 1 on async io.\n");
449         return;
450 +}
451 +
452 +
453 +/*
454 + * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
455 + * for them to complete.  Clean up the buffer_heads afterwards.  
456 + */
457 +
458 +#define dprintk(x...)
459 +
460 +static int do_kio(int rw, int nr, struct buffer_head *bh[], int size)
461 +{
462 +       int iosize;
463 +       int i;
464 +       int err;
465 +       struct buffer_head *tmp;
466 +
467 +       dprintk ("do_kio start\n");
468 +       
469 +       ll_rw_block(rw, nr, bh);
470 +       iosize = err = 0;
471 +       
472 +       for (i = nr; --i >= 0; ) {
473 +               tmp = bh[i];
474 +               wait_on_buffer(tmp);
475 +               if (!buffer_uptodate(tmp)) {
476 +                       err = -EIO;
477 +                       /* We are waiting on bh'es in reverse order so
478 +                           clearing iosize on error calculates the
479 +                           amount of IO before the first error. */
480 +                       iosize = 0;
481 +               }
482 +               
483 +               put_unused_buffer_head(tmp);
484 +               iosize += size;
485 +       }
486 +       wake_up(&buffer_wait);
487 +       
488 +       dprintk ("do_kio end %d %d\n", iosize, err);
489 +       
490 +       if (iosize)
491 +               return iosize;
492 +       else
493 +               return err;
494 +}
495 +
496 +/*
497 + * Start I/O on a physical range of kernel memory, defined by a vector
498 + * of kiobuf structs (much like a user-space iovec list).
499 + *
500 + * IO is submitted asynchronously: you need to check page->locked,
501 + * page->uptodate, and maybe wait on page->wait.
502 + *
503 + * It is up to the caller to make sure that there are enough blocks
504 + * passed in to completely map the iobufs to disk.  */
505 +
506 +int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
507 +              kdev_t dev, unsigned long b[], int size, int bmap)
508 +{
509 +       int             err;
510 +       int             length;
511 +       int             transferred;
512 +       int             i;
513 +       int             bufind;
514 +       int             pageind;
515 +       int             bhind;
516 +       int             offset;
517 +       unsigned long   blocknr;
518 +       struct kiobuf * iobuf = NULL;
519 +       unsigned long   page;
520 +       struct page *   map;
521 +       struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
522 +
523 +       /* 
524 +        * First, do some alignment and validity checks 
525 +        */
526 +       for (i = 0; i < nr; i++) {
527 +               iobuf = iovec[i];
528 +               if ((iobuf->offset & (size-1)) ||
529 +                   (iobuf->length & (size-1)))
530 +                       return -EINVAL;
531 +               if (!iobuf->nr_pages)
532 +                       panic("brw_kiovec: iobuf not initialised");
533 +       }
534 +
535 +       /* DEBUG */
536 +#if 0
537 +       return iobuf->length;
538 +#endif
539 +       dprintk ("brw_kiovec: start\n");
540 +       
541 +       /* 
542 +        * OK to walk down the iovec doing page IO on each page we find. 
543 +        */
544 +       bufind = bhind = transferred = err = 0;
545 +       for (i = 0; i < nr; i++) {
546 +               iobuf = iovec[i];
547 +               offset = iobuf->offset;
548 +               length = iobuf->length;
549 +               dprintk ("iobuf %d %d %d\n", offset, length, size);
550 +
551 +               for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
552 +                       page = iobuf->pagelist[pageind];
553 +                       map  = iobuf->maplist[pageind];
554 +
555 +                       while (length > 0) {
556 +                               blocknr = b[bufind++];
557 +                               tmp = get_unused_buffer_head(0);
558 +                               if (!tmp) {
559 +                                       err = -ENOMEM;
560 +                                       goto error;
561 +                               }
562 +                               
563 +                               tmp->b_dev = B_FREE;
564 +                               tmp->b_size = size;
565 +                               tmp->b_data = (char *) (page + offset);
566 +                               tmp->b_this_page = tmp;
567 +
568 +                               init_buffer(tmp, dev, blocknr,
569 +                                           end_buffer_io_sync, NULL);
570 +                               if (rw == WRITE) {
571 +                                       set_bit(BH_Uptodate, &tmp->b_state);
572 +                                       set_bit(BH_Dirty, &tmp->b_state);
573 +                               }
574 +
575 +                               dprintk ("buffer %d (%d) at %p\n", 
576 +                                        bhind, tmp->b_blocknr, tmp->b_data);
577 +                               bh[bhind++] = tmp;
578 +                               length -= size;
579 +                               offset += size;
580 +
581 +                               /* 
582 +                                * Start the IO if we have got too much or if
583 +                                * this is the end of the last iobuf 
584 +                                */
585 +                               if (bhind >= KIO_MAX_SECTORS) {
586 +                                       err = do_kio(rw, bhind, bh, size);
587 +                                       if (err >= 0)
588 +                                               transferred += err;
589 +                                       else
590 +                                               goto finished;
591 +                                       bhind = 0;
592 +                               }
593 +                               
594 +                               if (offset >= PAGE_SIZE) {
595 +                                       offset = 0;
596 +                                       break;
597 +                               }
598 +                       } /* End of block loop */
599 +               } /* End of page loop */                
600 +       } /* End of iovec loop */
601 +
602 +       /* Is there any IO still left to submit? */
603 +       if (bhind) {
604 +               err = do_kio(rw, bhind, bh, size);
605 +               if (err >= 0)
606 +                       transferred += err;
607 +               else
608 +                       goto finished;
609 +       }
610 +
611 + finished:
612 +       dprintk ("brw_kiovec: end (%d, %d)\n", transferred, err);
613 +       if (transferred)
614 +               return transferred;
615 +       return err;
616 +
617 + error:
618 +       /* We got an error allocation the bh'es.  Just free the current
619 +           buffer_heads and exit. */
620 +       for (i = 0; i < bhind; i++)
621 +               put_unused_buffer_head(bh[i]);
622 +       wake_up(&buffer_wait);
623 +       goto finished;
624  }
625  
626  /*
627 diff -Nru linux/fs/iobuf.c linux.new/fs/iobuf.c
628 --- linux/fs/iobuf.c    Thu Jan  1 01:00:00 1970
629 +++ linux.new/fs/iobuf.c        Thu Jan  4 06:50:46 2001
630 @@ -0,0 +1,106 @@
631 +/*
632 + * iobuf.c
633 + *
634 + * Keep track of the general-purpose IO-buffer structures used to track
635 + * abstract kernel-space io buffers.
636 + * 
637 + */
638 +
639 +#include <linux/iobuf.h>
640 +#include <linux/malloc.h>
641 +#include <linux/slab.h>
642 +
643 +static kmem_cache_t *kiobuf_cachep;
644 +
645 +void __init kiobuf_init(void)
646 +{
647 +       kiobuf_cachep =  kmem_cache_create("kiobuf",
648 +                                          sizeof(struct kiobuf),
649 +                                          0,
650 +                                          SLAB_HWCACHE_ALIGN, NULL, NULL);
651 +       if(!kiobuf_cachep)
652 +               panic("Cannot create kernel iobuf cache\n");
653 +}
654 +
655 +
656 +int alloc_kiovec(int nr, struct kiobuf **bufp)
657 +{
658 +       int i;
659 +       struct kiobuf *iobuf;
660 +       
661 +       for (i = 0; i < nr; i++) {
662 +               iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
663 +               if (!iobuf) {
664 +                       free_kiovec(i, bufp);
665 +                       return -ENOMEM;
666 +               }
667 +               
668 +               memset(iobuf, 0, sizeof(*iobuf));
669 +               iobuf->array_len = KIO_STATIC_PAGES;
670 +               iobuf->pagelist  = iobuf->page_array;
671 +               iobuf->maplist   = iobuf->map_array;
672 +               *bufp++ = iobuf;
673 +       }
674 +       
675 +       return 0;
676 +}
677 +
678 +void free_kiovec(int nr, struct kiobuf **bufp) 
679 +{
680 +       struct kiobuf *iobuf;
681 +       int i;
682 +       
683 +       for (i = 0; i < nr; i++) {
684 +               iobuf = bufp[i];
685 +               if (iobuf->array_len > KIO_STATIC_PAGES) {
686 +                       kfree (iobuf->pagelist);
687 +                       kfree (iobuf->maplist);
688 +               }
689 +               kmem_cache_free(kiobuf_cachep, bufp[i]);
690 +       }
691 +}
692 +
693 +int expand_kiobuf(struct kiobuf *iobuf, int wanted)
694 +{
695 +       unsigned long * pagelist;
696 +       struct page ** maplist;
697 +       
698 +       if (iobuf->array_len >= wanted)
699 +               return 0;
700 +       
701 +       pagelist = (unsigned long *) 
702 +               kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL);
703 +       if (!pagelist)
704 +               return -ENOMEM;
705 +       
706 +       maplist = (struct page **) 
707 +               kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
708 +       if (!maplist) {
709 +               kfree(pagelist);
710 +               return -ENOMEM;
711 +       }
712 +
713 +       /* Did it grow while we waited? */
714 +       if (iobuf->array_len >= wanted) {
715 +               kfree(pagelist);
716 +               kfree(maplist);
717 +               return 0;
718 +       }
719 +       
720 +       memcpy (pagelist, iobuf->pagelist, 
721 +               iobuf->array_len * sizeof(unsigned long));
722 +       memcpy (maplist,  iobuf->maplist,   
723 +               iobuf->array_len * sizeof(struct page **));
724 +
725 +       if (iobuf->array_len > KIO_STATIC_PAGES) {
726 +               kfree (iobuf->pagelist);
727 +               kfree (iobuf->maplist);
728 +       }
729 +       
730 +       iobuf->pagelist  = pagelist;
731 +       iobuf->maplist   = maplist;
732 +       iobuf->array_len = wanted;
733 +       return 0;
734 +}
735 +
736 +
737 diff -Nru linux/include/linux/iobuf.h linux.new/include/linux/iobuf.h
738 --- linux/include/linux/iobuf.h Thu Jan  1 01:00:00 1970
739 +++ linux.new/include/linux/iobuf.h     Thu Jan  4 06:50:47 2001
740 @@ -0,0 +1,70 @@
741 +/*
742 + * iobuf.h
743 + *
744 + * Defines the structures used to track abstract kernel-space io buffers.
745 + *
746 + */
747 +
748 +#ifndef __LINUX_IOBUF_H
749 +#define __LINUX_IOBUF_H
750 +
751 +#include <linux/mm.h>
752 +#include <linux/init.h>
753 +
754 +/*
755 + * The kiobuf structure describes a physical set of pages reserved
756 + * locked for IO.  The reference counts on each page will have been
757 + * incremented, and the flags field will indicate whether or not we have
758 + * pre-locked all of the pages for IO.
759 + *
760 + * kiobufs may be passed in arrays to form a kiovec, but we must
761 + * preserve the property that no page is present more than once over the
762 + * entire iovec.
763 + */
764 +
765 +#define KIO_MAX_ATOMIC_IO      64 /* in kb */
766 +#define KIO_MAX_ATOMIC_BYTES   (64 * 1024)
767 +#define KIO_STATIC_PAGES       (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10))
768 +#define KIO_MAX_SECTORS                (KIO_MAX_ATOMIC_IO * 2)
769 +
770 +struct kiobuf 
771 +{
772 +       int             nr_pages;       /* Pages actually referenced */
773 +       int             array_len;      /* Space in the allocated lists */
774 +       int             offset;         /* Offset to start of valid data */
775 +       int             length;         /* Number of valid bytes of data */
776 +
777 +       /* Keep separate track of the physical addresses and page
778 +        * structs involved.  If we do IO to a memory-mapped device
779 +        * region, there won't necessarily be page structs defined for
780 +        * every address. */
781 +
782 +       unsigned long * pagelist;
783 +       struct page **  maplist;
784 +
785 +       unsigned int    locked : 1;     /* If set, pages has been locked */
786 +       
787 +       /* Always embed enough struct pages for 64k of IO */
788 +       unsigned long   page_array[KIO_STATIC_PAGES];
789 +       struct page *   map_array[KIO_STATIC_PAGES];
790 +};
791 +
792 +
793 +/* mm/memory.c */
794 +
795 +int    map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
796 +void   unmap_kiobuf(struct kiobuf *iobuf);
797 +
798 +/* fs/iobuf.c */
799 +
800 +void __init kiobuf_init(void);
801 +int    alloc_kiovec(int nr, struct kiobuf **);
802 +void   free_kiovec(int nr, struct kiobuf **);
803 +int    expand_kiobuf(struct kiobuf *, int);
804 +
805 +/* fs/buffer.c */
806 +
807 +int    brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
808 +                  kdev_t dev, unsigned long b[], int size, int bmap);
809 +
810 +#endif /* __LINUX_IOBUF_H */
811 diff -Nru linux/include/linux/major.h linux.new/include/linux/major.h
812 --- linux/include/linux/major.h Thu Jan  4 06:58:46 2001
813 +++ linux.new/include/linux/major.h     Thu Jan  4 06:50:47 2001
814 @@ -131,6 +131,8 @@
815  #define IDE8_MAJOR     90
816  #define IDE9_MAJOR     91
817  
818 +#define RAW_MAJOR      162
819 +
820  #define UNIX98_PTY_MASTER_MAJOR        128
821  #define UNIX98_PTY_MAJOR_COUNT 8
822  #define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
823 diff -Nru linux/include/linux/raw.h linux.new/include/linux/raw.h
824 --- linux/include/linux/raw.h   Thu Jan  1 01:00:00 1970
825 +++ linux.new/include/linux/raw.h       Thu Jan  4 06:50:47 2001
826 @@ -0,0 +1,23 @@
827 +#ifndef __LINUX_RAW_H
828 +#define __LINUX_RAW_H
829 +
830 +#include <linux/types.h>
831 +
832 +#define RAW_SETBIND    _IO( 0xac, 0 )
833 +#define RAW_GETBIND    _IO( 0xac, 1 )
834 +
835 +struct raw_config_request 
836 +{
837 +       int     raw_minor;
838 +       __u64   block_major;
839 +       __u64   block_minor;
840 +};
841 +
842 +#ifdef __KERNEL__
843 +
844 +/* drivers/char/raw.c */
845 +extern void raw_init(void);
846 +
847 +#endif /* __KERNEL__ */
848 +
849 +#endif /* __LINUX_RAW_H */
850 diff -Nru linux/init/main.c linux.new/init/main.c
851 --- linux/init/main.c   Thu Jan  4 06:58:46 2001
852 +++ linux.new/init/main.c       Thu Jan  4 06:50:47 2001
853 @@ -23,6 +23,7 @@
854  #include <linux/smp_lock.h>
855  #include <linux/blk.h>
856  #include <linux/hdreg.h>
857 +#include <linux/iobuf.h>
858  #include <linux/init.h>
859  
860  #include <asm/io.h>
861 @@ -1461,6 +1462,7 @@
862  #ifdef CONFIG_ARCH_S390
863         ccwcache_init();
864  #endif
865 +       kiobuf_init();
866         signals_init();
867         inode_init();
868         file_table_init();
869 diff -Nru linux/kernel/ksyms.c linux.new/kernel/ksyms.c
870 --- linux/kernel/ksyms.c        Thu Jan  4 06:58:46 2001
871 +++ linux.new/kernel/ksyms.c    Thu Jan  4 06:50:47 2001
872 @@ -37,6 +37,7 @@
873  #include <linux/poll.h>
874  #include <linux/mm.h>
875  #include <linux/capability.h>
876 +#include <linux/iobuf.h>
877  
878  #if defined(CONFIG_PROC_FS)
879  #include <linux/proc_fs.h>
880 @@ -266,6 +267,14 @@
881  EXPORT_SYMBOL(max_sectors);
882  EXPORT_SYMBOL(max_segments);
883  EXPORT_SYMBOL(max_readahead);
884 +
885 +/* kiobuf support */
886 +EXPORT_SYMBOL(map_user_kiobuf);
887 +EXPORT_SYMBOL(unmap_kiobuf);
888 +EXPORT_SYMBOL(alloc_kiovec);
889 +EXPORT_SYMBOL(free_kiovec);
890 +EXPORT_SYMBOL(expand_kiobuf);
891 +EXPORT_SYMBOL(brw_kiovec);
892  
893  /* tty routines */
894  EXPORT_SYMBOL(tty_hangup);
895 diff -Nru linux/mm/memory.c linux.new/mm/memory.c
896 --- linux/mm/memory.c   Thu Jan  4 06:58:46 2001
897 +++ linux.new/mm/memory.c       Thu Jan  4 06:50:47 2001
898 @@ -37,6 +37,8 @@
899  #include <linux/mman.h>
900  #include <linux/swap.h>
901  #include <linux/smp_lock.h>
902 +#include <linux/pagemap.h>
903 +#include <linux/iobuf.h>
904  
905  #include <asm/uaccess.h>
906  #include <asm/pgtable.h>
907 @@ -395,6 +397,220 @@
908                 if (mm->rss < 0)
909                         mm->rss = 0;
910         }
911 +}
912 +
913 +
914 +/*
915 + * Do a quick page-table lookup for a single page. 
916 + */
917 +static unsigned long get_page(unsigned long address, int write) 
918 +{
919 +       pgd_t *pgd;
920 +       pmd_t *pmd;
921 +
922 +       pgd = pgd_offset(current->mm, address);
923 +       pmd = pmd_offset(pgd, address);
924 +       if (pmd) {
925 +               pte_t * pte = pte_offset(pmd, address);
926 +               if (pte && pte_present(*pte)) {
927 +                       if (!write ||
928 +                           (pte_write(*pte) && pte_dirty(*pte)))
929 +                               return pte_page(*pte);
930 +               }
931 +       }
932 +       
933 +       return 0;
934 +}
935 +
936 +/* 
937 + * Given a physical address, is there a useful struct page pointing to it?
938 + */
939 +
940 +static struct page * get_page_map(unsigned long page)
941 +{
942 +       struct page *map;
943 +       
944 +       if (MAP_NR(page) >= max_mapnr)
945 +               return 0;
946 +       if (page == ZERO_PAGE(page))
947 +               return 0;
948 +       map = mem_map + MAP_NR(page);
949 +       if (PageReserved(map))
950 +               return 0;
951 +       return map;
952 +}
953 +
954 +/*
955 + * Force in an entire range of pages from the current process's user VA,
956 + * and pin and lock the pages for IO.  
957 + */
958 +
959 +#define dprintk(x...)
960 +int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
961 +{
962 +       unsigned long           ptr, end;
963 +       int                     err;
964 +       struct mm_struct *      mm;
965 +       struct vm_area_struct * vma = 0;
966 +       unsigned long           page;
967 +       struct page *           map;
968 +       int                     doublepage = 0;
969 +       int                     repeat = 0;
970 +       int                     i;
971 +        /* if we read from disk it means we write to memory */
972 +       int                     writemem = (rw == READ);
973 +       
974 +       /* Make sure the iobuf is not already mapped somewhere. */
975 +       if (iobuf->nr_pages)
976 +               return -EINVAL;
977 +
978 +       mm = current->mm;
979 +       dprintk ("map_user_kiobuf: begin\n");
980 +       
981 +       ptr = va & PAGE_MASK;
982 +       end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
983 +       err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
984 +       if (err)
985 +               return err;
986 +
987 + repeat:
988 +       down(&mm->mmap_sem);
989 +
990 +       err = -EFAULT;
991 +       iobuf->locked = writemem;
992 +       iobuf->offset = va & ~PAGE_MASK;
993 +       iobuf->length = len;
994 +       
995 +       i = 0;
996 +       
997 +       /* 
998 +        * First of all, try to fault in all of the necessary pages
999 +        */
1000 +       while (ptr < end) {
1001 +               if (!vma || ptr >= vma->vm_end) {
1002 +                       vma = find_vma(mm, ptr);
1003 +                       if (!vma)
1004 +                               goto out;
1005 +                       if (vma->vm_start > ptr) {
1006 +                               if (!(vma->vm_flags & VM_GROWSDOWN))
1007 +                                       goto out;
1008 +                               if (expand_stack(vma, ptr))
1009 +                                       goto out;
1010 +                       }
1011 +                       err = -EACCES;
1012 +                       if (writemem) {
1013 +                               if (!(vma->vm_flags & VM_WRITE))
1014 +                                       goto out;
1015 +                       } else {
1016 +                               if (!(vma->vm_flags & VM_READ))
1017 +                                       goto out;
1018 +                       }
1019 +                       err = -EFAULT;
1020 +               }
1021 +               while (!(page = get_page(ptr, writemem))) {
1022 +                       int ret;
1023 +
1024 +                       ret = handle_mm_fault(current, vma, ptr, writemem);
1025 +                       if (ret <= 0) {
1026 +                               if (!ret)
1027 +                                       goto out;
1028 +                               else {
1029 +                                       err = -ENOMEM;
1030 +                                       goto out;
1031 +                               }
1032 +                       }
1033 +               }
1034 +               map = get_page_map(page);
1035 +               if (map) {
1036 +                       if (writemem) {
1037 +                               /*
1038 +                                * Lock down the pages only if we're going
1039 +                                * to write to memory. If if we're reading
1040 +                                * from memory we're free to go ahead
1041 +                                * only after pinning the page on the
1042 +                                * physical side.
1043 +                                */
1044 +                               if (PageLocked(map))
1045 +                                       goto retry;
1046 +                               set_bit(PG_locked, &map->flags);
1047 +                       }
1048 +                       flush_dcache_page(page_address(map));
1049 +                       atomic_inc(&map->count);
1050 +               }
1051 +               dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
1052 +               iobuf->pagelist[i] = page;
1053 +               iobuf->maplist[i] = map;
1054 +               iobuf->nr_pages = ++i;
1055 +               
1056 +               ptr += PAGE_SIZE;
1057 +       }
1058 +
1059 +       up(&mm->mmap_sem);
1060 +       dprintk ("map_user_kiobuf: end OK\n");
1061 +       return 0;
1062 +
1063 + out:
1064 +       up(&mm->mmap_sem);
1065 +       unmap_kiobuf(iobuf);
1066 +       dprintk ("map_user_kiobuf: end %d\n", err);
1067 +       return err;
1068 +
1069 + retry:
1070 +
1071 +       /* 
1072 +        * Undo the locking so far, wait on the page we got to, and try again.
1073 +        */
1074 +       unmap_kiobuf(iobuf);
1075 +       up(&mm->mmap_sem);
1076 +       ptr = va & PAGE_MASK;
1077 +
1078 +       /* 
1079 +        * Did the release also unlock the page we got stuck on?
1080 +        */
1081 +       if (!PageLocked(map)) {
1082 +               /* If so, we may well have the page mapped twice in the
1083 +                * IO address range.  Bad news.  Of course, it _might_
1084 +                * just be a coincidence, but if it happens more than
1085 +                * once, chances are we have a double-mapped page. */
1086 +               if (++doublepage >= 3) {
1087 +                       return -EINVAL;
1088 +               }
1089 +       }
1090 +       
1091 +       /*
1092 +        * Try again...
1093 +        */
1094 +       wait_on_page(map);
1095 +       if (++repeat < 16)
1096 +               goto repeat;
1097 +       return -EAGAIN;
1098 +}
1099 +
1100 +
1101 +/*
1102 + * Unmap all of the pages referenced by a kiobuf.  We release the pages,
1103 + * and unlock them if they were locked. 
1104 + */
1105 +
1106 +void unmap_kiobuf (struct kiobuf *iobuf) 
1107 +{
1108 +       int i;
1109 +       struct page *map;
1110 +       
1111 +       for (i = 0; i < iobuf->nr_pages; i++) {
1112 +               map = iobuf->maplist[i];
1113 +               
1114 +               if (map) {
1115 +                       if (iobuf->locked) {
1116 +                               clear_bit(PG_locked, &map->flags);
1117 +                               wake_up(&map->wait);
1118 +                       }
1119 +                       __free_page(map);
1120 +               }
1121 +       }
1122 +       
1123 +       iobuf->nr_pages = 0;
1124 +       iobuf->locked = 0;
1125  }
1126  
1127  static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
This page took 0.448598 seconds and 3 git commands to generate.