]> git.pld-linux.org Git - packages/kernel.git/blame - linux-raw.patch
- added description of djurban's branch
[packages/kernel.git] / linux-raw.patch
CommitLineData
e15b74d6 1diff -Nru linux/drivers/char/Makefile linux.new/drivers/char/Makefile
2--- linux/drivers/char/Makefile Thu Jan 4 06:58:46 2001
3+++ linux.new/drivers/char/Makefile Thu Jan 4 06:50:46 2001
4@@ -20,7 +20,7 @@
5
6 O_TARGET := char.o
7 M_OBJS :=
8-O_OBJS := tty_io.o n_tty.o tty_ioctl.o mem.o random.o
9+O_OBJS := tty_io.o n_tty.o tty_ioctl.o mem.o random.o raw.o
10 OX_OBJS := pty.o misc.o
11 obj-y :=
12 obj-m :=
13diff -Nru linux/drivers/char/mem.c linux.new/drivers/char/mem.c
14--- linux/drivers/char/mem.c Thu Jan 4 06:58:46 2001
15+++ linux.new/drivers/char/mem.c Thu Jan 4 06:59:49 2001
16@@ -17,6 +17,7 @@
17 #include <linux/joystick.h>
18 #include <linux/i2c.h>
19 #include <linux/capability.h>
20+#include <linux/raw.h>
21
22 #include <asm/uaccess.h>
23 #include <asm/io.h>
24@@ -608,6 +609,7 @@
25 if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
26 printk("unable to get major %d for memory devs\n", MEM_MAJOR);
27 rand_initialize();
28+ raw_init();
29 #if defined (CONFIG_FB)
30 fbmem_init();
31 #endif
32diff -Nru linux/drivers/char/raw.c linux.new/drivers/char/raw.c
33--- linux/drivers/char/raw.c Thu Jan 1 01:00:00 1970
34+++ linux.new/drivers/char/raw.c Thu Jan 4 06:50:46 2001
35@@ -0,0 +1,387 @@
36+/*
37+ * linux/drivers/char/raw.c
38+ *
39+ * Front-end raw character devices. These can be bound to any block
40+ * devices to provide genuine Unix raw character device semantics.
41+ *
42+ * We reserve minor number 0 for a control interface. ioctl()s on this
43+ * device are used to bind the other minor numbers to block devices.
44+ */
45+
46+#include <linux/fs.h>
47+#include <linux/iobuf.h>
48+#include <linux/major.h>
49+#include <linux/blkdev.h>
50+#include <linux/raw.h>
51+#include <asm/uaccess.h>
52+
53+#define dprintk(x...)
54+
55+static kdev_t raw_device_bindings[256] = {};
56+static int raw_device_inuse[256] = {};
57+static int raw_device_sector_size[256] = {};
58+static int raw_device_sector_bits[256] = {};
59+
60+extern struct file_operations * get_blkfops(unsigned int major);
61+
62+static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
63+
64+ssize_t raw_read(struct file *, char *, size_t, loff_t *);
65+ssize_t raw_write(struct file *, const char *, size_t, loff_t *);
66+int raw_open(struct inode *, struct file *);
67+int raw_release(struct inode *, struct file *);
68+int raw_ctl_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
69+
70+
71+static struct file_operations raw_fops = {
72+ NULL, /* llseek */
73+ raw_read, /* read */
74+ raw_write, /* write */
75+ NULL, /* readdir */
76+ NULL, /* poll */
77+ NULL, /* ioctl */
78+ NULL, /* mmap */
79+ raw_open, /* open */
80+ NULL, /* flush */
81+ raw_release, /* release */
82+ NULL /* fsync */
83+};
84+
85+static struct file_operations raw_ctl_fops = {
86+ NULL, /* llseek */
87+ NULL, /* read */
88+ NULL, /* write */
89+ NULL, /* readdir */
90+ NULL, /* poll */
91+ raw_ctl_ioctl, /* ioctl */
92+ NULL, /* mmap */
93+ raw_open, /* open */
94+ NULL, /* flush */
95+ NULL, /* no special release code */
96+ NULL /* fsync */
97+};
98+
99+
100+
101+void __init raw_init(void)
102+{
103+ register_chrdev(RAW_MAJOR, "raw", &raw_fops);
104+}
105+
106+
107+/*
108+ * The raw IO open and release code needs to fake appropriate
109+ * open/release calls to the underlying block devices.
110+ */
111+
112+static int bdev_open(kdev_t dev, int mode)
113+{
114+ int err = 0;
115+ struct file dummy_file = {};
116+ struct dentry dummy_dentry = {};
117+ struct inode * inode = get_empty_inode();
118+
119+ if (!inode)
120+ return -ENOMEM;
121+
122+ dummy_file.f_op = get_blkfops(MAJOR(dev));
123+ if (!dummy_file.f_op) {
124+ err = -ENODEV;
125+ goto done;
126+ }
127+
128+ if (dummy_file.f_op->open) {
129+ inode->i_rdev = dev;
130+ dummy_dentry.d_inode = inode;
131+ dummy_file.f_dentry = &dummy_dentry;
132+ dummy_file.f_mode = mode;
133+ err = dummy_file.f_op->open(inode, &dummy_file);
134+ }
135+
136+ done:
137+ iput(inode);
138+ return err;
139+}
140+
141+static int bdev_close(kdev_t dev)
142+{
143+ int err;
144+ struct inode * inode = get_empty_inode();
145+
146+ if (!inode)
147+ return -ENOMEM;
148+
149+ inode->i_rdev = dev;
150+ err = blkdev_release(inode);
151+ iput(inode);
152+ return err;
153+}
154+
155+
156+
157+/*
158+ * Open/close code for raw IO.
159+ */
160+
161+int raw_open(struct inode *inode, struct file *filp)
162+{
163+ int minor;
164+ kdev_t bdev;
165+ int err;
166+ int sector_size;
167+ int sector_bits;
168+
169+ minor = MINOR(inode->i_rdev);
170+
171+ /*
172+ * Is it the control device?
173+ */
174+
175+ if (minor == 0) {
176+ filp->f_op = &raw_ctl_fops;
177+ return 0;
178+ }
179+
180+ /*
181+ * No, it is a normal raw device. All we need to do on open is
182+ * to check that the device is bound, and force the underlying
183+ * block device to a sector-size blocksize.
184+ */
185+
186+ bdev = raw_device_bindings[minor];
187+ if (bdev == NODEV)
188+ return -ENODEV;
189+
190+ err = bdev_open(bdev, filp->f_mode);
191+ if (err)
192+ return err;
193+
194+ /*
195+ * Don't change the blocksize if we already have users using
196+ * this device
197+ */
198+
199+ if (raw_device_inuse[minor]++)
200+ return 0;
201+
202+ /*
203+ * Don't interfere with mounted devices: we cannot safely set
204+ * the blocksize on a device which is already mounted.
205+ */
206+
207+ sector_size = 512;
208+ if (lookup_vfsmnt(bdev) != NULL) {
209+ if (blksize_size[MAJOR(bdev)])
210+ sector_size = blksize_size[MAJOR(bdev)][MINOR(bdev)];
211+ } else {
212+ if (hardsect_size[MAJOR(bdev)])
213+ sector_size = hardsect_size[MAJOR(bdev)][MINOR(bdev)];
214+ }
215+
216+ set_blocksize(bdev, sector_size);
217+ raw_device_sector_size[minor] = sector_size;
218+
219+ for (sector_bits = 0; !(sector_size & 1); )
220+ sector_size>>=1, sector_bits++;
221+ raw_device_sector_bits[minor] = sector_bits;
222+
223+ return 0;
224+}
225+
226+int raw_release(struct inode *inode, struct file *filp)
227+{
228+ int minor;
229+ kdev_t bdev;
230+
231+ minor = MINOR(inode->i_rdev);
232+ bdev = raw_device_bindings[minor];
233+ bdev_close(bdev);
234+ raw_device_inuse[minor]--;
235+ return 0;
236+}
237+
238+
239+
240+/*
241+ * Deal with ioctls against the raw-device control interface, to bind
242+ * and unbind other raw devices.
243+ */
244+
245+int raw_ctl_ioctl(struct inode *inode,
246+ struct file *flip,
247+ unsigned int command,
248+ unsigned long arg)
249+{
250+ struct raw_config_request rq;
251+ int err = 0;
252+ int minor;
253+
254+ switch (command) {
255+ case RAW_SETBIND:
256+ case RAW_GETBIND:
257+
258+ /* First, find out which raw minor we want */
259+
260+ err = copy_from_user(&rq, (void *) arg, sizeof(rq));
261+ if (err)
262+ break;
263+
264+ minor = rq.raw_minor;
265+ if (minor == 0 || minor > MINORMASK) {
266+ err = -EINVAL;
267+ break;
268+ }
269+
270+ if (command == RAW_SETBIND) {
271+ /*
272+ * For now, we don't need to check that the underlying
273+ * block device is present or not: we can do that when
274+ * the raw device is opened. Just check that the
275+ * major/minor numbers make sense.
276+ */
277+
278+ if (rq.block_major == NODEV ||
279+ rq.block_major > MAX_BLKDEV ||
280+ rq.block_minor > MINORMASK) {
281+ err = -EINVAL;
282+ break;
283+ }
284+
285+ if (raw_device_inuse[minor]) {
286+ err = -EBUSY;
287+ break;
288+ }
289+ raw_device_bindings[minor] =
290+ MKDEV(rq.block_major, rq.block_minor);
291+ } else {
292+ rq.block_major = MAJOR(raw_device_bindings[minor]);
293+ rq.block_minor = MINOR(raw_device_bindings[minor]);
294+ err = copy_to_user((void *) arg, &rq, sizeof(rq));
295+ }
296+ break;
297+
298+ default:
299+ err = -EINVAL;
300+ }
301+
302+ return err;
303+}
304+
305+
306+
307+ssize_t raw_read(struct file *filp, char * buf,
308+ size_t size, loff_t *offp)
309+{
310+ return rw_raw_dev(READ, filp, buf, size, offp);
311+}
312+
313+ssize_t raw_write(struct file *filp, const char *buf,
314+ size_t size, loff_t *offp)
315+{
316+ return rw_raw_dev(WRITE, filp, (char *) buf, size, offp);
317+}
318+
319+#define SECTOR_BITS 9
320+#define SECTOR_SIZE (1U << SECTOR_BITS)
321+#define SECTOR_MASK (SECTOR_SIZE - 1)
322+
323+ssize_t rw_raw_dev(int rw, struct file *filp, char *buf,
324+ size_t size, loff_t *offp)
325+{
326+ struct kiobuf * iobuf;
327+ int err;
328+ unsigned long blocknr, blocks;
329+ unsigned long b[KIO_MAX_SECTORS];
330+ size_t transferred;
331+ int iosize;
332+ int i;
333+ int minor;
334+ kdev_t dev;
335+ unsigned long limit;
336+
337+ int sector_size, sector_bits, sector_mask;
338+ int max_sectors;
339+
340+ /*
341+ * First, a few checks on device size limits
342+ */
343+
344+ minor = MINOR(filp->f_dentry->d_inode->i_rdev);
345+ dev = raw_device_bindings[minor];
346+ sector_size = raw_device_sector_size[minor];
347+ sector_bits = raw_device_sector_bits[minor];
348+ sector_mask = sector_size- 1;
349+ max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
350+
351+ if (blk_size[MAJOR(dev)])
352+ limit = (((loff_t) blk_size[MAJOR(dev)][MINOR(dev)]) << BLOCK_SIZE_BITS) >> sector_bits;
353+ else
354+ limit = INT_MAX;
355+ dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
356+ MAJOR(dev), MINOR(dev), limit);
357+
358+ if ((*offp & sector_mask) || (size & sector_mask))
359+ return -EINVAL;
360+ if ((*offp >> sector_bits) >= limit) {
361+ if (size)
362+ return -ENXIO;
363+ return 0;
364+ }
365+
366+ /*
367+ * We'll just use one kiobuf
368+ */
369+
370+ err = alloc_kiovec(1, &iobuf);
371+ if (err)
372+ return err;
373+
374+ /*
375+ * Split the IO into KIO_MAX_SECTORS chunks, mapping and
376+ * unmapping the single kiobuf as we go to perform each chunk of
377+ * IO.
378+ */
379+
380+ transferred = 0;
381+ blocknr = *offp >> sector_bits;
382+ while (size > 0) {
383+ blocks = size >> sector_bits;
384+ if (blocks > max_sectors)
385+ blocks = max_sectors;
386+ if (blocks > limit - blocknr)
387+ blocks = limit - blocknr;
388+ if (!blocks)
389+ break;
390+
391+ iosize = blocks << sector_bits;
392+
393+ err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
394+ if (err)
395+ break;
396+
397+ for (i=0; i < blocks; i++)
398+ b[i] = blocknr++;
399+
400+ err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size, 0);
401+
402+ if (err >= 0) {
403+ transferred += err;
404+ size -= err;
405+ buf += err;
406+ }
407+
408+ unmap_kiobuf(iobuf);
409+
410+ if (err != iosize)
411+ break;
412+ }
413+
414+ free_kiovec(1, &iobuf);
415+
416+ if (transferred) {
417+ *offp += transferred;
418+ return transferred;
419+ }
420+
421+ return err;
422+}
423diff -Nru linux/fs/Makefile linux.new/fs/Makefile
424--- linux/fs/Makefile Thu Jan 4 06:58:46 2001
425+++ linux.new/fs/Makefile Thu Jan 4 06:50:46 2001
426@@ -13,7 +13,7 @@
427 O_OBJS = open.o read_write.o devices.o file_table.o buffer.o \
428 super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
429 ioctl.o readdir.o select.o fifo.o locks.o filesystems.o \
430- dcache.o inode.o attr.o bad_inode.o file.o $(BINFMTS)
431+ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o $(BINFMTS)
432
433 MOD_LIST_NAME := FS_MODULES
434 ALL_SUB_DIRS = coda minix ext2 fat msdos vfat proc isofs nfs umsdos ntfs \
435diff -Nru linux/fs/buffer.c linux.new/fs/buffer.c
436--- linux/fs/buffer.c Thu Jan 4 06:58:46 2001
437+++ linux.new/fs/buffer.c Thu Jan 4 06:50:46 2001
438@@ -43,6 +43,7 @@
439 #include <linux/file.h>
440 #include <linux/init.h>
441 #include <linux/quotaops.h>
442+#include <linux/iobuf.h>
443
444 #include <asm/uaccess.h>
445 #include <asm/io.h>
446@@ -1259,6 +1260,180 @@
447 bad_count:
448 printk ("Whoops: end_buffer_io_async: b_count != 1 on async io.\n");
449 return;
450+}
451+
452+
453+/*
454+ * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
455+ * for them to complete. Clean up the buffer_heads afterwards.
456+ */
457+
458+#define dprintk(x...)
459+
460+static int do_kio(int rw, int nr, struct buffer_head *bh[], int size)
461+{
462+ int iosize;
463+ int i;
464+ int err;
465+ struct buffer_head *tmp;
466+
467+ dprintk ("do_kio start\n");
468+
469+ ll_rw_block(rw, nr, bh);
470+ iosize = err = 0;
471+
472+ for (i = nr; --i >= 0; ) {
473+ tmp = bh[i];
474+ wait_on_buffer(tmp);
475+ if (!buffer_uptodate(tmp)) {
476+ err = -EIO;
477+ /* We are waiting on bh'es in reverse order so
478+ clearing iosize on error calculates the
479+ amount of IO before the first error. */
480+ iosize = 0;
481+ }
482+
483+ put_unused_buffer_head(tmp);
484+ iosize += size;
485+ }
486+ wake_up(&buffer_wait);
487+
488+ dprintk ("do_kio end %d %d\n", iosize, err);
489+
490+ if (iosize)
491+ return iosize;
492+ else
493+ return err;
494+}
495+
496+/*
497+ * Start I/O on a physical range of kernel memory, defined by a vector
498+ * of kiobuf structs (much like a user-space iovec list).
499+ *
500+ * IO is submitted asynchronously: you need to check page->locked,
501+ * page->uptodate, and maybe wait on page->wait.
502+ *
503+ * It is up to the caller to make sure that there are enough blocks
504+ * passed in to completely map the iobufs to disk. */
505+
506+int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
507+ kdev_t dev, unsigned long b[], int size, int bmap)
508+{
509+ int err;
510+ int length;
511+ int transferred;
512+ int i;
513+ int bufind;
514+ int pageind;
515+ int bhind;
516+ int offset;
517+ unsigned long blocknr;
518+ struct kiobuf * iobuf = NULL;
519+ unsigned long page;
520+ struct page * map;
521+ struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
522+
523+ /*
524+ * First, do some alignment and validity checks
525+ */
526+ for (i = 0; i < nr; i++) {
527+ iobuf = iovec[i];
528+ if ((iobuf->offset & (size-1)) ||
529+ (iobuf->length & (size-1)))
530+ return -EINVAL;
531+ if (!iobuf->nr_pages)
532+ panic("brw_kiovec: iobuf not initialised");
533+ }
534+
535+ /* DEBUG */
536+#if 0
537+ return iobuf->length;
538+#endif
539+ dprintk ("brw_kiovec: start\n");
540+
541+ /*
542+ * OK to walk down the iovec doing page IO on each page we find.
543+ */
544+ bufind = bhind = transferred = err = 0;
545+ for (i = 0; i < nr; i++) {
546+ iobuf = iovec[i];
547+ offset = iobuf->offset;
548+ length = iobuf->length;
549+ dprintk ("iobuf %d %d %d\n", offset, length, size);
550+
551+ for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
552+ page = iobuf->pagelist[pageind];
553+ map = iobuf->maplist[pageind];
554+
555+ while (length > 0) {
556+ blocknr = b[bufind++];
557+ tmp = get_unused_buffer_head(0);
558+ if (!tmp) {
559+ err = -ENOMEM;
560+ goto error;
561+ }
562+
563+ tmp->b_dev = B_FREE;
564+ tmp->b_size = size;
565+ tmp->b_data = (char *) (page + offset);
566+ tmp->b_this_page = tmp;
567+
568+ init_buffer(tmp, dev, blocknr,
569+ end_buffer_io_sync, NULL);
570+ if (rw == WRITE) {
571+ set_bit(BH_Uptodate, &tmp->b_state);
572+ set_bit(BH_Dirty, &tmp->b_state);
573+ }
574+
575+ dprintk ("buffer %d (%d) at %p\n",
576+ bhind, tmp->b_blocknr, tmp->b_data);
577+ bh[bhind++] = tmp;
578+ length -= size;
579+ offset += size;
580+
581+ /*
582+ * Start the IO if we have got too much or if
583+ * this is the end of the last iobuf
584+ */
585+ if (bhind >= KIO_MAX_SECTORS) {
586+ err = do_kio(rw, bhind, bh, size);
587+ if (err >= 0)
588+ transferred += err;
589+ else
590+ goto finished;
591+ bhind = 0;
592+ }
593+
594+ if (offset >= PAGE_SIZE) {
595+ offset = 0;
596+ break;
597+ }
598+ } /* End of block loop */
599+ } /* End of page loop */
600+ } /* End of iovec loop */
601+
602+ /* Is there any IO still left to submit? */
603+ if (bhind) {
604+ err = do_kio(rw, bhind, bh, size);
605+ if (err >= 0)
606+ transferred += err;
607+ else
608+ goto finished;
609+ }
610+
611+ finished:
612+ dprintk ("brw_kiovec: end (%d, %d)\n", transferred, err);
613+ if (transferred)
614+ return transferred;
615+ return err;
616+
617+ error:
618+ /* We got an error allocation the bh'es. Just free the current
619+ buffer_heads and exit. */
620+ for (i = 0; i < bhind; i++)
621+ put_unused_buffer_head(bh[i]);
622+ wake_up(&buffer_wait);
623+ goto finished;
624 }
625
626 /*
627diff -Nru linux/fs/iobuf.c linux.new/fs/iobuf.c
628--- linux/fs/iobuf.c Thu Jan 1 01:00:00 1970
629+++ linux.new/fs/iobuf.c Thu Jan 4 06:50:46 2001
630@@ -0,0 +1,106 @@
631+/*
632+ * iobuf.c
633+ *
634+ * Keep track of the general-purpose IO-buffer structures used to track
635+ * abstract kernel-space io buffers.
636+ *
637+ */
638+
639+#include <linux/iobuf.h>
640+#include <linux/malloc.h>
641+#include <linux/slab.h>
642+
643+static kmem_cache_t *kiobuf_cachep;
644+
645+void __init kiobuf_init(void)
646+{
647+ kiobuf_cachep = kmem_cache_create("kiobuf",
648+ sizeof(struct kiobuf),
649+ 0,
650+ SLAB_HWCACHE_ALIGN, NULL, NULL);
651+ if(!kiobuf_cachep)
652+ panic("Cannot create kernel iobuf cache\n");
653+}
654+
655+
656+int alloc_kiovec(int nr, struct kiobuf **bufp)
657+{
658+ int i;
659+ struct kiobuf *iobuf;
660+
661+ for (i = 0; i < nr; i++) {
662+ iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
663+ if (!iobuf) {
664+ free_kiovec(i, bufp);
665+ return -ENOMEM;
666+ }
667+
668+ memset(iobuf, 0, sizeof(*iobuf));
669+ iobuf->array_len = KIO_STATIC_PAGES;
670+ iobuf->pagelist = iobuf->page_array;
671+ iobuf->maplist = iobuf->map_array;
672+ *bufp++ = iobuf;
673+ }
674+
675+ return 0;
676+}
677+
678+void free_kiovec(int nr, struct kiobuf **bufp)
679+{
680+ struct kiobuf *iobuf;
681+ int i;
682+
683+ for (i = 0; i < nr; i++) {
684+ iobuf = bufp[i];
685+ if (iobuf->array_len > KIO_STATIC_PAGES) {
686+ kfree (iobuf->pagelist);
687+ kfree (iobuf->maplist);
688+ }
689+ kmem_cache_free(kiobuf_cachep, bufp[i]);
690+ }
691+}
692+
693+int expand_kiobuf(struct kiobuf *iobuf, int wanted)
694+{
695+ unsigned long * pagelist;
696+ struct page ** maplist;
697+
698+ if (iobuf->array_len >= wanted)
699+ return 0;
700+
701+ pagelist = (unsigned long *)
702+ kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL);
703+ if (!pagelist)
704+ return -ENOMEM;
705+
706+ maplist = (struct page **)
707+ kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
708+ if (!maplist) {
709+ kfree(pagelist);
710+ return -ENOMEM;
711+ }
712+
713+ /* Did it grow while we waited? */
714+ if (iobuf->array_len >= wanted) {
715+ kfree(pagelist);
716+ kfree(maplist);
717+ return 0;
718+ }
719+
720+ memcpy (pagelist, iobuf->pagelist,
721+ iobuf->array_len * sizeof(unsigned long));
722+ memcpy (maplist, iobuf->maplist,
723+ iobuf->array_len * sizeof(struct page **));
724+
725+ if (iobuf->array_len > KIO_STATIC_PAGES) {
726+ kfree (iobuf->pagelist);
727+ kfree (iobuf->maplist);
728+ }
729+
730+ iobuf->pagelist = pagelist;
731+ iobuf->maplist = maplist;
732+ iobuf->array_len = wanted;
733+ return 0;
734+}
735+
736+
737diff -Nru linux/include/linux/iobuf.h linux.new/include/linux/iobuf.h
738--- linux/include/linux/iobuf.h Thu Jan 1 01:00:00 1970
739+++ linux.new/include/linux/iobuf.h Thu Jan 4 06:50:47 2001
740@@ -0,0 +1,70 @@
741+/*
742+ * iobuf.h
743+ *
744+ * Defines the structures used to track abstract kernel-space io buffers.
745+ *
746+ */
747+
748+#ifndef __LINUX_IOBUF_H
749+#define __LINUX_IOBUF_H
750+
751+#include <linux/mm.h>
752+#include <linux/init.h>
753+
754+/*
755+ * The kiobuf structure describes a physical set of pages reserved
756+ * locked for IO. The reference counts on each page will have been
757+ * incremented, and the flags field will indicate whether or not we have
758+ * pre-locked all of the pages for IO.
759+ *
760+ * kiobufs may be passed in arrays to form a kiovec, but we must
761+ * preserve the property that no page is present more than once over the
762+ * entire iovec.
763+ */
764+
765+#define KIO_MAX_ATOMIC_IO 64 /* in kb */
766+#define KIO_MAX_ATOMIC_BYTES (64 * 1024)
767+#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10))
768+#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2)
769+
770+struct kiobuf
771+{
772+ int nr_pages; /* Pages actually referenced */
773+ int array_len; /* Space in the allocated lists */
774+ int offset; /* Offset to start of valid data */
775+ int length; /* Number of valid bytes of data */
776+
777+ /* Keep separate track of the physical addresses and page
778+ * structs involved. If we do IO to a memory-mapped device
779+ * region, there won't necessarily be page structs defined for
780+ * every address. */
781+
782+ unsigned long * pagelist;
783+ struct page ** maplist;
784+
785+ unsigned int locked : 1; /* If set, pages has been locked */
786+
787+ /* Always embed enough struct pages for 64k of IO */
788+ unsigned long page_array[KIO_STATIC_PAGES];
789+ struct page * map_array[KIO_STATIC_PAGES];
790+};
791+
792+
793+/* mm/memory.c */
794+
795+int map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
796+void unmap_kiobuf(struct kiobuf *iobuf);
797+
798+/* fs/iobuf.c */
799+
800+void __init kiobuf_init(void);
801+int alloc_kiovec(int nr, struct kiobuf **);
802+void free_kiovec(int nr, struct kiobuf **);
803+int expand_kiobuf(struct kiobuf *, int);
804+
805+/* fs/buffer.c */
806+
807+int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
808+ kdev_t dev, unsigned long b[], int size, int bmap);
809+
810+#endif /* __LINUX_IOBUF_H */
811diff -Nru linux/include/linux/major.h linux.new/include/linux/major.h
812--- linux/include/linux/major.h Thu Jan 4 06:58:46 2001
813+++ linux.new/include/linux/major.h Thu Jan 4 06:50:47 2001
814@@ -131,6 +131,8 @@
815 #define IDE8_MAJOR 90
816 #define IDE9_MAJOR 91
817
818+#define RAW_MAJOR 162
819+
820 #define UNIX98_PTY_MASTER_MAJOR 128
821 #define UNIX98_PTY_MAJOR_COUNT 8
822 #define UNIX98_PTY_SLAVE_MAJOR (UNIX98_PTY_MASTER_MAJOR+UNIX98_PTY_MAJOR_COUNT)
823diff -Nru linux/include/linux/raw.h linux.new/include/linux/raw.h
824--- linux/include/linux/raw.h Thu Jan 1 01:00:00 1970
825+++ linux.new/include/linux/raw.h Thu Jan 4 06:50:47 2001
826@@ -0,0 +1,23 @@
827+#ifndef __LINUX_RAW_H
828+#define __LINUX_RAW_H
829+
830+#include <linux/types.h>
831+
832+#define RAW_SETBIND _IO( 0xac, 0 )
833+#define RAW_GETBIND _IO( 0xac, 1 )
834+
835+struct raw_config_request
836+{
837+ int raw_minor;
838+ __u64 block_major;
839+ __u64 block_minor;
840+};
841+
842+#ifdef __KERNEL__
843+
844+/* drivers/char/raw.c */
845+extern void raw_init(void);
846+
847+#endif /* __KERNEL__ */
848+
849+#endif /* __LINUX_RAW_H */
850diff -Nru linux/init/main.c linux.new/init/main.c
851--- linux/init/main.c Thu Jan 4 06:58:46 2001
852+++ linux.new/init/main.c Thu Jan 4 06:50:47 2001
853@@ -23,6 +23,7 @@
854 #include <linux/smp_lock.h>
855 #include <linux/blk.h>
856 #include <linux/hdreg.h>
857+#include <linux/iobuf.h>
858 #include <linux/init.h>
859
860 #include <asm/io.h>
861@@ -1461,6 +1462,7 @@
862 #ifdef CONFIG_ARCH_S390
863 ccwcache_init();
864 #endif
865+ kiobuf_init();
866 signals_init();
867 inode_init();
868 file_table_init();
869diff -Nru linux/kernel/ksyms.c linux.new/kernel/ksyms.c
870--- linux/kernel/ksyms.c Thu Jan 4 06:58:46 2001
871+++ linux.new/kernel/ksyms.c Thu Jan 4 06:50:47 2001
872@@ -37,6 +37,7 @@
873 #include <linux/poll.h>
874 #include <linux/mm.h>
875 #include <linux/capability.h>
876+#include <linux/iobuf.h>
877
878 #if defined(CONFIG_PROC_FS)
879 #include <linux/proc_fs.h>
880@@ -266,6 +267,14 @@
881 EXPORT_SYMBOL(max_sectors);
882 EXPORT_SYMBOL(max_segments);
883 EXPORT_SYMBOL(max_readahead);
884+
885+/* kiobuf support */
886+EXPORT_SYMBOL(map_user_kiobuf);
887+EXPORT_SYMBOL(unmap_kiobuf);
888+EXPORT_SYMBOL(alloc_kiovec);
889+EXPORT_SYMBOL(free_kiovec);
890+EXPORT_SYMBOL(expand_kiobuf);
891+EXPORT_SYMBOL(brw_kiovec);
892
893 /* tty routines */
894 EXPORT_SYMBOL(tty_hangup);
895diff -Nru linux/mm/memory.c linux.new/mm/memory.c
896--- linux/mm/memory.c Thu Jan 4 06:58:46 2001
897+++ linux.new/mm/memory.c Thu Jan 4 06:50:47 2001
898@@ -37,6 +37,8 @@
899 #include <linux/mman.h>
900 #include <linux/swap.h>
901 #include <linux/smp_lock.h>
902+#include <linux/pagemap.h>
903+#include <linux/iobuf.h>
904
905 #include <asm/uaccess.h>
906 #include <asm/pgtable.h>
907@@ -395,6 +397,220 @@
908 if (mm->rss < 0)
909 mm->rss = 0;
910 }
911+}
912+
913+
914+/*
915+ * Do a quick page-table lookup for a single page.
916+ */
917+static unsigned long get_page(unsigned long address, int write)
918+{
919+ pgd_t *pgd;
920+ pmd_t *pmd;
921+
922+ pgd = pgd_offset(current->mm, address);
923+ pmd = pmd_offset(pgd, address);
924+ if (pmd) {
925+ pte_t * pte = pte_offset(pmd, address);
926+ if (pte && pte_present(*pte)) {
927+ if (!write ||
928+ (pte_write(*pte) && pte_dirty(*pte)))
929+ return pte_page(*pte);
930+ }
931+ }
932+
933+ return 0;
934+}
935+
936+/*
937+ * Given a physical address, is there a useful struct page pointing to it?
938+ */
939+
940+static struct page * get_page_map(unsigned long page)
941+{
942+ struct page *map;
943+
944+ if (MAP_NR(page) >= max_mapnr)
945+ return 0;
946+ if (page == ZERO_PAGE(page))
947+ return 0;
948+ map = mem_map + MAP_NR(page);
949+ if (PageReserved(map))
950+ return 0;
951+ return map;
952+}
953+
954+/*
955+ * Force in an entire range of pages from the current process's user VA,
956+ * and pin and lock the pages for IO.
957+ */
958+
959+#define dprintk(x...)
960+int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
961+{
962+ unsigned long ptr, end;
963+ int err;
964+ struct mm_struct * mm;
965+ struct vm_area_struct * vma = 0;
966+ unsigned long page;
967+ struct page * map;
968+ int doublepage = 0;
969+ int repeat = 0;
970+ int i;
971+ /* if we read from disk it means we write to memory */
972+ int writemem = (rw == READ);
973+
974+ /* Make sure the iobuf is not already mapped somewhere. */
975+ if (iobuf->nr_pages)
976+ return -EINVAL;
977+
978+ mm = current->mm;
979+ dprintk ("map_user_kiobuf: begin\n");
980+
981+ ptr = va & PAGE_MASK;
982+ end = (va + len + PAGE_SIZE - 1) & PAGE_MASK;
983+ err = expand_kiobuf(iobuf, (end - ptr) >> PAGE_SHIFT);
984+ if (err)
985+ return err;
986+
987+ repeat:
988+ down(&mm->mmap_sem);
989+
990+ err = -EFAULT;
991+ iobuf->locked = writemem;
992+ iobuf->offset = va & ~PAGE_MASK;
993+ iobuf->length = len;
994+
995+ i = 0;
996+
997+ /*
998+ * First of all, try to fault in all of the necessary pages
999+ */
1000+ while (ptr < end) {
1001+ if (!vma || ptr >= vma->vm_end) {
1002+ vma = find_vma(mm, ptr);
1003+ if (!vma)
1004+ goto out;
1005+ if (vma->vm_start > ptr) {
1006+ if (!(vma->vm_flags & VM_GROWSDOWN))
1007+ goto out;
1008+ if (expand_stack(vma, ptr))
1009+ goto out;
1010+ }
1011+ err = -EACCES;
1012+ if (writemem) {
1013+ if (!(vma->vm_flags & VM_WRITE))
1014+ goto out;
1015+ } else {
1016+ if (!(vma->vm_flags & VM_READ))
1017+ goto out;
1018+ }
1019+ err = -EFAULT;
1020+ }
1021+ while (!(page = get_page(ptr, writemem))) {
1022+ int ret;
1023+
1024+ ret = handle_mm_fault(current, vma, ptr, writemem);
1025+ if (ret <= 0) {
1026+ if (!ret)
1027+ goto out;
1028+ else {
1029+ err = -ENOMEM;
1030+ goto out;
1031+ }
1032+ }
1033+ }
1034+ map = get_page_map(page);
1035+ if (map) {
1036+ if (writemem) {
1037+ /*
1038+ * Lock down the pages only if we're going
1039+ * to write to memory. If if we're reading
1040+ * from memory we're free to go ahead
1041+ * only after pinning the page on the
1042+ * physical side.
1043+ */
1044+ if (PageLocked(map))
1045+ goto retry;
1046+ set_bit(PG_locked, &map->flags);
1047+ }
1048+ flush_dcache_page(page_address(map));
1049+ atomic_inc(&map->count);
1050+ }
1051+ dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
1052+ iobuf->pagelist[i] = page;
1053+ iobuf->maplist[i] = map;
1054+ iobuf->nr_pages = ++i;
1055+
1056+ ptr += PAGE_SIZE;
1057+ }
1058+
1059+ up(&mm->mmap_sem);
1060+ dprintk ("map_user_kiobuf: end OK\n");
1061+ return 0;
1062+
1063+ out:
1064+ up(&mm->mmap_sem);
1065+ unmap_kiobuf(iobuf);
1066+ dprintk ("map_user_kiobuf: end %d\n", err);
1067+ return err;
1068+
1069+ retry:
1070+
1071+ /*
1072+ * Undo the locking so far, wait on the page we got to, and try again.
1073+ */
1074+ unmap_kiobuf(iobuf);
1075+ up(&mm->mmap_sem);
1076+ ptr = va & PAGE_MASK;
1077+
1078+ /*
1079+ * Did the release also unlock the page we got stuck on?
1080+ */
1081+ if (!PageLocked(map)) {
1082+ /* If so, we may well have the page mapped twice in the
1083+ * IO address range. Bad news. Of course, it _might_
1084+ * just be a coincidence, but if it happens more than
1085+ * once, chances are we have a double-mapped page. */
1086+ if (++doublepage >= 3) {
1087+ return -EINVAL;
1088+ }
1089+ }
1090+
1091+ /*
1092+ * Try again...
1093+ */
1094+ wait_on_page(map);
1095+ if (++repeat < 16)
1096+ goto repeat;
1097+ return -EAGAIN;
1098+}
1099+
1100+
1101+/*
1102+ * Unmap all of the pages referenced by a kiobuf. We release the pages,
1103+ * and unlock them if they were locked.
1104+ */
1105+
1106+void unmap_kiobuf (struct kiobuf *iobuf)
1107+{
1108+ int i;
1109+ struct page *map;
1110+
1111+ for (i = 0; i < iobuf->nr_pages; i++) {
1112+ map = iobuf->maplist[i];
1113+
1114+ if (map) {
1115+ if (iobuf->locked) {
1116+ clear_bit(PG_locked, &map->flags);
1117+ wake_up(&map->wait);
1118+ }
1119+ __free_page(map);
1120+ }
1121+ }
1122+
1123+ iobuf->nr_pages = 0;
1124+ iobuf->locked = 0;
1125 }
1126
1127 static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
This page took 0.643344 seconds and 4 git commands to generate.