1 --- linux-2.5.64/drivers/block/Kconfig.pre-enbd Mon Mar 24 15:56:52 2003
2 +++ linux-2.5.64/drivers/block/Kconfig Mon Mar 24 17:45:35 2003
8 + bool 'Enhanced network block device'
11 + To use the ENBD support, you must say Y here and select one
12 + of the driver's units (e.g. BLK_DEV_ENBD, BLK_DEV_ENBD_IOCTL).
14 +source "drivers/block/enbd/Kconfig"
17 tristate "RAM disk support"
19 --- linux-2.5.64/drivers/block/enbd/enbd_base.c.pre-enbd Mon Mar 24 18:55:25 2003
20 +++ linux-2.5.64/drivers/block/enbd/enbd_base.c Tue Mar 25 15:44:12 2003
23 + * (Enhanced) Network block device - make block devices work over TCP
25 + * Original NBD Copyright 1997 Pavel Machek <pavel@elf.mj.gts.cz>
26 + * Further ENBD Copyrights 1998, 1999, 2000 Peter Breuer <ptb@it.uc3m.es>
30 + * ATTENTION: You need the userspace daemons available from
31 + * ftp://oboe.it.uc3m.es/pub/Programs/nbd-2.4.*.tgz
32 + * and/or the ENBD project on http://freshmeat.net
36 + * Development of the ENBD software has been supported by grants and
37 + * contributions from Realm Information Technologies, Inc. of 5555
38 + * Oakbrook Parkway, NW Norcross, GA and iNsu Innovations Inc. of
39 + * 3465, Boulevard Thimens, Saint-Laurent, Quebec, Canada.
41 + * ------------ Pavel's history notes ----------------------------------
42 + * 97-3-25 compiled 0-th version, not yet tested it
43 + * (it did not work, BTW) (later that day) HEY! it works!
44 + * (bit later) hmm, not that much... 2:00am next day:
45 + * yes, it works, but it gives something like 50kB/sec
46 + * 97-3-28 it's completely strange - when using 1024 byte "packets"
47 + * it gives 50kB/sec and CPU idle; with 2048 bytes it gives
48 + * 500kB/sec (and CPU loaded 100% as it should be) (all done
49 + * against localhost)
50 + * 97-4-1 complete rewrite to make it possible for many requests at
51 + * once to be processed
52 + * 97-4-1 23:57 rewrite once again to make it work :-(
53 + * 97-4-3 00:02 hmm, it does not work.
54 + * 97-4-3 23:06 hmm, it will need one more rewrite :-)
55 + * 97-4-10 It looks like it's working and stable. But I still do not
56 + * have any recovery from lost connection...
57 + * (setq tab-width 4)
58 + * 97-4-11 Making protocol independent of endianity etc.
59 + * 97-4-15 Probably one more rewrite, since it loses requests under
61 + * 97-9-13 Cosmetic changes
63 + * possible FIXME: make set_sock / set_blksize / set_size / do_it one syscall
64 + * why not: would need verify_area and friends, would share yet another
65 + * structure with userland
67 + * FIXME: not module-safe
69 + * ------------ Peter's history notes ----------------------------------
70 + * 98-12-18 modules now OK ptb@it.uc3m.es (Peter Breuer) ported to
71 + * 2.0.*. + better debugging. Still possible lockup in connection with APM
72 + * and spurious interrupt - only on write. Error treatment should
73 + * be improved. After 100 errors from end_request the kernel can
74 + * do anything. We should catch it ourselves.
75 + * 99-1-sometime fixed lockup by extending semaphore - ptb v1.0.
76 + * 99-3-sometime reconnect protocol (client mod agreed by pavel) - ptb v1.1
77 + * 99-4-25 add /proc/nbdinfo - ptb v1.1.1
78 + * 99-4-sometime add multiplex - ptb v1.2
79 + * 99-4-26 fix multiplex and redundancy - ptb v1.2.1
80 + * 99-4-29 reentrant client threads - ptb v1.2.2
81 + * 99-4-29 socket related stuff placed in user space - amarin v1.3.0
82 + * 99-5-3 fix all, all writes had to be before all reads - ptb v1.2.4
83 + * 99-5-5 fix out-of-order, async - ptb v1.2.5
84 + * 99-5-7 semaphores removed (still works!), fail cases corrected - ptb v1.2.6
85 + * 99-5-12 signals unblocked in xmit, blksize != 1024 fixed, ioctls
86 + * added - ptb v1.2.7
87 + * 99-6-1 interaction with client split into two functions - amarin v1.3.0
88 + * 99-6-3 reintegrated fully, mem manager fixed, accounts fixed - ptb v1.2.8.3
89 + * 99-6-3 extra queue removed, mem manager removed - ptb v1.2.8.4
90 + * 99-7-3 buffer registration introduced - ptb v1.2.8.5
91 + * 99-7-3 some client redundancy reestablished - ptb v2.1.1
92 + * 99-7-10 encapsulated queue calls. One element rollback buffer - ptb v2.1.2
93 + * 99-7-20 timestamp and rollback old abandoned request - ptb v2.1.3
94 + * 99-7-24 64bit file sizes and offsets accepted - ptb v2.1.9
95 + * 99-7-26 experimental request coalesces - ptb v2.1.10
96 + * 99-7-27 partitioning scheme - ptb v2.2.1
97 + * 99-8-3 nbd_clr_sock bug in invalidate_device fixed? - ptb v2.2.4
98 + * 99-8-5 reverse replace of block_fsync, add sig ioctls - ptb v2.2.5
99 + * reverse bug introduced about v2.2.3 for compound reqs - ptb v2.2.5
100 + * fix clear_que bug (didn't rollback first) from 2.1.3 - ptb v2.2.5
101 + * 99-8-22 workaround strange nr_sectors bug - ptb v2.2.6
102 + * 99-8-11 fix MY_NBD_SYNC bug. Never sync'ed all - ptb v2.2.7
103 + * 99-8-12 wakeups all moved to enqueue - ptb v2.2.7
104 + * 99-8-23 remove slot->cli_age - ptb v2.2.7
105 + * 99-8-24 first 8 bytes of signature embedded in packets - ptb v2.2.8
106 + * fix SET_SIG define buglet, remove hardcoded constants - ptb v2.2.8
107 + * fix huge bug. Missing copy_fromfs in my_nbd_ack - ptb v2.2.8
108 + * removed signature embedding and all other decorations - ptb v2.2.8
109 + * 99-8-25 recast fix in my_nbd_ack to avoid align. bug - ptb v2.2.9
110 + * put in MKDEVs and put back some hardcode const fixes - ptb v2.2.10
111 + * 99-9-29 fix BLKGETSIZE bug - ptb v2.2.14
112 + * 99-10-2 run with interrupts on throughout. Think we lose some - ptb v2.2.15
113 + * 99-10-8 trim dead code, kernel 2.2 ifdef's - ptb v2.2.17
114 + * 99-12-18 further o-o - ptb v2.2.19
115 + * 99-12-28 queue account cleanup. endio on queue reqs at reset - ptb v2.2.20
116 + * interruptible semaphores for better client recovery - ptb v2.2.20
117 + * 00-1-2 debugging cleanups. Fix race in end_request - ptb v2.2.21
118 + * 00-1-4 semaphores simplified. - ptb v2.2.22
119 + * 00-6-8 emergency control by write to proc - ptb v2.2.24
120 + * 00-7-20 ported to 2.4.0-test1. Possible minor bugs found/fixed - ptb v2.2.24
121 + * 00-7-27 changed proc i/f to read_proc from get_info in 2.2/2.4 - ptb v2.2.25
122 + * 00-7-30 fixed reads before writes under 2.4 by disabling merge - ptb v2.2.25
123 + * 00-7-30 and fixed merge_reqs for 2.4, now that I understand! - ptb v2.2.25
124 + * 00-7-30 fixed/introduced possible bug in end_io for 2.2/2.4 - ptb v2.2.25
125 + * 00-7-30 added timeval/zone field in requests and replies - ptb v2.4.0
126 + * 00-7-30 fixed hitherto masked bug in read_stat in nbd_client - ptb v2.4.0
127 + * 00-7-30 added timeout to net writes in nbd_client - ptb v2.4.0
128 + * 00-8-20 display fix for devices over 2GB - ptb v2.4.5
129 + * 00-8-23 more 64 bit fixes + error out overrange requests- ptb v2.4.6/2.2.27
130 + * 00-8-31 add NBD_ERR ioctl to error out slot request- ptb v2.4.9
131 + * 00-8-31 soften NBD_SOFT_RESET so doesn't wreck protocol - ptb v2.4.9
132 + * 00-9-1 remove %L's from printfs. Kernel 2.2. doesn't - ptb v2.4.10/2.2.27
133 + * 00-9-6 add various state flags to help init order - ptb v2.4.11
134 + * 00-9-8 add checks for device initialised to set_sock - ptb v2.4.12
135 + * 00-9-17 en/disable device as aslot count goes through 0 - ptb v2.4.13/2.2.28
136 + * 00-9-21 split read/write dev req counts for accounting - ptb v2.4.14
137 + * 00-9-21 renamed sync_intvl to req_timeo - ptb v2.4.14
138 + * 00-9-21 made sync_intvl count write blocks - ptb v2.4.14
139 + * 00-9-22 repair enable after delayed disable when disabled - ptb v2.4.14
140 + * 00-9-22 include sync (nonblocking) after sync_intvl reqs - ptb v2.4.14
141 + * 00-9-25 disable sync (nonblocking) after sync_intvl reqs - ptb v2.4.14
142 + * 00-9-25 bundle invalidate_buffers in clr_sock - ptb v2.4.14
143 + * 00-10-20 implement req_timeo per device + ioctl (Wang Gang) - ptb v2.4.15
144 + * 00-10-20 add raid mode (Wang Gang) - ptb v2.4.15
145 + * 00-10-26 throttle in do_req - ptb v2.4.15
146 + * 00-10-28 do set_sock on first open and clr_sock on last close - ptb v2.4.15
147 + * 00-11-01 make sync_intvl really sync - ptb v2.4.15
148 + * 00-11-14 rename throttle to plug, nbd_sync takes arg - ptb v2.4.17
149 + * 00-11-19 clr_sock errs req not rollback if show_errs & !aslot - ptb v2.4.17
150 + * 00-11-20 removed autodeadlock when disabled in do_req end_req - ptb v2.4.17
151 + * 00-11-21 make MY_NBD_SYNC only sync when sync_intvl > 0 - ptb v2.4.17
152 + * 00-12-24 make MY_NBD_GET_REQ use a timeout arg - ptb v2.4.18
153 + * 01-02-12 ported to 2.4.0 (works). do_nbd_request rewritten - ptb v2.4.20
154 + * 01-02-20 managed to get plugging and clustered read/writes OK - ptb v2.4.21
155 + * 01-02-21 eliminated slot->buflen for the time being - ptb v2.4.21
156 + * 01-02-27 added proper devfs support - ptb v2.4.22
157 + * 01-03-15 allowed more devices/in devfs, cleaned up endio - ptb v2.4.23
158 + * 01-03-15 added device letter (<= 3 chars) to struct- - ptb v2.4.23
159 + * 01-03-15 added request size check to do_nbd_req - ptb v2.4.23
160 + * 01-03-15 increased MAX_SECTORS to 512 by default - ptb v2.4.23
161 + * 01-03-15 made major number a module parameter - ptb v2.4.23
162 + * 01-03-18 added max_sectors array - ptb v2.4.23
163 + * 01-03-23 added devfs links - ptb v2.4.23
164 + * 01-04-17 plugging always enabled for 2.4 kernels - ptb v2.4.24
165 + * 01-04-17 made SET_RO set_device_ro as well as set local flags - ptb v2.4.25
166 + * 01-04-28 impl SET_MD5SUM ioctl and proc support for md5sum - ptb v2.4.25
167 + * 01-04-29 added accounting for md5'd reqs - ptb v2.4.25
168 + * 01-07-29 added atomic protections for accounting - ptb v2.4.25
169 + * 01-08-01 fixed 2.4 smp bugs. Interrupts off in spinlocks - ptb v2.4.25
170 + * 01-08-01 removed all semaphores for spinlocks - ptb v2.4.25
171 + * 01-08-01 invalidate_buffers in clr_sock (req'd Rogier Wolff) - ptb v2.4.25
172 + * 01-08-02 fixed smp deadlock - end_that_request_first slept! ptb v2.4.26
173 + * 01-10-16 provisionally added error in device open when notenabled ptb v2.4.27
174 + * 01-10-18 added DIRTY flag to save on repeated invalidate_buffers ptb v2.4.27
175 + * 01-10-31 increment seqno_out before delivery, so really starts at 1 v2.4.27
176 + * 01-11-01 move zeroing of seqno in cmd field to nbe_end_req* ptb v2.4.27
177 + * 01-11-18 add speed calculation, dev fields, display in proc ptb v2.4.27
178 + * 01-11-20 modifications for compiling into monolithic kernel ptb v2.4.27
179 + * 01-12-06 clr requests before reenabling, not after, in nbd_enable ptb 2.4.27
180 + * 02-02-21 make nbd_rollback modal, absirbing nbd_error ptb 2.4.27
181 + * 02-08-08 added local BLKSSZGET (reject) and related ioctls ptb 2.4.30
182 + * 02-08-12 make nbd_ack not ruin req when its rolled back already ptb 2.4.30
183 + * 02-09-18 fix __FUNCTION__ for new gcc ptb 2.4.30
184 + * 02-09-18 always allow daemon death even with reqs waiting ptb 2.4.30
185 + * 02-09-18 eliminate SYNC_REQD, RLSE_REQD ptb 2.4.30
186 + * 02-09-18 eliminate speed_lim ptb 2.4.30
187 + * 02-09-18 fix countq accounting ptb 2.4.30
188 + * 02-09-18 encapsulate remote ioctl handling ptb 2.4.30
189 + * 02-09-18 remote ioctl uses kernel req, not our fake one ptb 2.4.30
190 + * 02-09-18 eliminated ctldta use (too much tricky logic) ptb 2.4.30
191 + * 02-09-28 handle req specials ptb 2.4.30
192 + * 02-10-10 introduce DIRECT flag ptb 2.4.30
193 + * 02-10-13 rollback pushes reqs to local queue, not queues them! ptb 2.4.30
194 + * 02-10-13 add hooks for separate ioctl module ptb 2.4.30
195 + * 02-10-16 take set_sock out of open. Put pid check in handshake ptb 2.4.30
196 + * 02-10-16 define MY_NBD_GET_NPORT ioctl ptb 2.4.30
197 + * 02-10-18 remove wait from MY_NBD_SYNC ioctl ptb 2.4.30
198 + * 02-10-20 rollback adds requests to queue in seqno order ptb 2.4.30
199 + * 02-10-23 introduce and use pid_sem instead of req_sem ptb 2.4.30
200 + * 02-10-30 support client fallback to ioctls on whole disk ptb 2.4.30
201 + * 02-11-3 moved set INITIALISED up to coincide with setting inode ptb 2.4.30
202 + * 02-11-3 add media check and revalidate routines ptb 2.4.30
203 + * 02-11-4 encapuslate lives++ and ENABLED changes into nbd_enable ptb 2.4.30
204 + * 02-11-4 set_enable from proc only enables, not clears queue ptb 2.4.30
205 + * 11-11-4 take blk_put_request out of end_request (it locks!) ptb 2.4.30
206 + * 11-11-4 replace list_del by list_del_init ptb 2.4.30
207 + * 02-12-7 nbd_release made aware of daemons on whole disk ptb 2.4.30
208 + * 03-01-7 added ioctls for setfaulty etc. ptb 2.4.31
209 + * 03-02-1 used metalock for non-queue changes ptb 2.4.31
210 + * 03-03-12 add md_list notification ioctls ptb 2.4.31
213 +#include <linux/major.h>
214 +#ifndef UNIX98_PTY_MAJOR_COUNT
215 + #define UNIX98_PTY_MAJOR_COUNT 8
216 + #ifndef UNIX98_NR_MAJORS
217 + #define UNIX98_NR_MAJORS=UNIX98_PTY_MAJOR_COUNT
221 +#include <linux/module.h>
223 +#if defined(__GNUC__) && __GNUC__ >= 2
224 +#define _LOOSE_KERNEL_NAMES
227 +#include <linux/version.h>
229 +#include <linux/fs.h>
230 +#include <linux/stat.h>
231 +#include <linux/errno.h>
232 +#include <asm/segment.h>
234 +#include <asm/uaccess.h> /* PTB - when did this arrive in kernel? */
235 +#include <asm/byteorder.h>
236 +#include <linux/wrapper.h>
238 +#define MAJOR_NR NBD_MAJOR
239 +static int major = MAJOR_NR;
241 +#include <linux/proc_fs.h>
242 +#include <linux/genhd.h>
243 +#include <linux/hdreg.h>
245 +#include <linux/file.h> /* PTB - when did this arrive in kernel? */
247 +#include <linux/smp_lock.h>
249 +#include <linux/devfs_fs_kernel.h>
251 +#include <linux/sysctl.h>
252 +#include <linux/init.h>
253 +#include <linux/kdev_t.h>
254 +#include <linux/buffer_head.h>
255 +#include <linux/completion.h>
258 + * PTB --------------- compatibility ------------------- *
259 + * layer starts here. *
263 + * PTB BH_Protected disappeared somewhere around 2.4.10 but this is
264 + * still needed for the very rare write local/read remote mode. DOn't
265 + * worry about it in normal operation!
267 + #define mark_buffer_protected(rbh) \
269 + mark_buffer_dirty (rbh); \
270 + mark_buffer_uptodate (rbh, 1); \
271 + refile_buffer (rbh); \
274 + /* PTB list interface extensions */
275 + #define list_head(ptr, type, member) \
276 + (list_empty(ptr)?NULL:list_entry(((struct list_head *)ptr)->next,type,member))
277 + #define list_tail(ptr, type, member) \
278 + (list_empty(ptr)?NULL:list_entry(((struct list_head *)ptr)->prev,type,member))
280 + /* PTB for arches without the atomic mask ops (and no smp, I think!)
281 + * - feel free to correct with assembler
283 + #ifndef atomic_set_mask
284 + #define atomic_set_mask(mask, x) (x)->counter |= (mask)
286 + #ifndef atomic_clear_mask
287 + #define atomic_clear_mask(mask, x) (x)->counter &= ~(mask)
291 + * PTB --------------- compatibility ------------------- *
292 + * layer ENDS here. *
295 +int linux_version_code = LINUX_VERSION_CODE;
297 +#include <linux/bio.h>
298 +#include <linux/enbd.h>
299 +#include <linux/enbd_ioctl.h>
302 + * PTB kernel data - 4KB worth
303 + * We need space for nda, nda1, .. nda15, ndb, ndb1, ..
304 + * The index is exactly the minor number.
306 + static int nbd_blksizes[MAX_NBD * NBD_MAXCONN];
307 + static int nbd_sizes[MAX_NBD * NBD_MAXCONN];
308 + static __u64 nbd_bytesizes[MAX_NBD * NBD_MAXCONN];
309 + static int nbd_max_sectors[MAX_NBD * NBD_MAXCONN];
312 + * PTB our data - about 3KB
313 + * These are nda, ndb, ndc, ...
314 + * Divide the minor by NBD_MAXCONN to get this index.
316 + static struct nbd_device nbd_dev[MAX_NBD];
317 + static spinlock_t nbd_lock = SPIN_LOCK_UNLOCKED;
318 + static struct nbd_md nbd_md;
319 + static struct nbd_ioctl_stub nbd_remote_ioctl;
321 + struct nbd_device * nbd_get(int i) {
322 + return &nbd_dev[i];
325 + #define NBD_FAIL( s ) { \
326 + NBD_DEBUG(1, s " (result %d).\n" , result ); \
329 + #define NBD_HARDFAIL( s ) { \
330 + NBD_ERROR( s " (result %d).\n" , result ); \
331 + lo->harderror = result; \
332 + goto hard_error_out; \
336 + * PTB device parameters. These are module parameters too.
339 + static int rahead = NBD_RAHEAD_DFLT;/* PTB - read ahead blocks */
340 + static int sync_intvl = NBD_SYNC_INTVL; /* PTB - sync every n secs/Kreqs */
341 + static int merge_requests /* PTB - bool, do request coalesce */
342 + = NBD_MERGE_REQ_DFLT;
343 + static int buf_sectors = NBD_MAX_SECTORS;
344 + /* PTB - user bufsize required */
345 + static int show_errs = 1; /* PTB - RAID mode? not usually */
346 + static int direct = 0; /* PTB - all opens are O_DIRECT */
347 + static int plug = NBD_PLUG_DFLT;
349 + static int md5sum = 0; /* PTB - use md5summing write proto */
350 + static int md5_on_threshold = 1000; /* PTB - reqs reqd to turn md5 on */
351 + static int md5_off_threshold = 10; /* PTB - errs reqd to turn md5 off */
353 +#ifndef NO_BUFFERED_WRITES
354 + static int buffer_writes = 0; /* PTB - act like ramd on write */
355 +#endif /* NO_BUFFERED_WRITES */
358 + MODULE_PARM (rahead, "i");
359 + MODULE_PARM (sync_intvl, "i");
360 + MODULE_PARM (merge_requests, "i");
361 + MODULE_PARM (buf_sectors, "i");
362 + MODULE_PARM (show_errs, "i");
363 + MODULE_PARM (direct,"i");
364 + #ifndef NO_BUFFERED_WRITES
365 + MODULE_PARM (buffer_writes, "i");
366 + #endif /* NO_BUFFERED_WRITES */
367 + MODULE_PARM (major, "i");
368 + MODULE_PARM (md5sum, "i");
369 + MODULE_PARM (md5_on_threshold, "i");
370 + MODULE_PARM (md5_off_threshold, "i");
373 + // PTB This pointer is initialised in nbd_init.
374 + static struct request_queue * nbd_queue;
376 +#define NO_BUFFERED_WRITES 1
379 + * PTB --------------- functions ----------------------- *
384 + * Decode the request type of a request and return it. DOn't we
385 + * have anywhere else to put this? Yes, in private data. But
386 + * that's just a pointer to our device data so we don't use it.
388 + * we use the low bit (REQ_RW) of the flags and the first high bit
389 + * (REQ_NBD) to designate the type of request.
391 + * @req the request to get the type of.
395 +rq_type (struct request *req)
397 + if (req->flags & REQ_SPECIAL)
400 + switch ( ((req->flags & REQ_RW) ?1:0)
401 + | ((req->flags & REQ_NBD)?2:0)
412 + // PTB report what we can of the strangeness if it is strange
413 + return (req->flags < 4) ? -1: req->flags;
417 + * PTB code the request type into a request.
419 + * This appears to be only used when making an ioctl request and it
420 + * never really escapes from our private area and it doesn't matter too
421 + * much how efficient it is either.
423 + * This function marks a request for conventional viewing as
424 + * being of the designated conceptual type. It correspomds to the old
425 + * "type" field in requests.
427 + * @req the request to set the type on
428 + * @type one of READ, WRITE, etc.
431 +set_rq_type (struct request *req, int type)
435 + req->flags &= ~(REQ_RW | REQ_NBD | REQ_SPECIAL);
438 + req->flags &= ~(REQ_NBD | REQ_SPECIAL);
439 + req->flags |= REQ_RW;
442 + req->flags &= ~(REQ_RW | REQ_SPECIAL);
443 + req->flags |= REQ_NBD;
446 + req->flags &= ~REQ_SPECIAL;
447 + req->flags |= REQ_RW | REQ_NBD;
450 + req->flags |= REQ_RW | REQ_NBD | REQ_SPECIAL;
456 + * PTB count number of blocks in a request. This will be an overestimate
457 + * if the number is not an exact multiple. It seems to happen. We
458 + * guarrantee to return -ve only if the request is invalid.
460 + * @req - request we want to count
463 +nr_blks (struct request *req)
465 + unsigned log_sectors_per_blk;
466 + unsigned sectors_per_blk;
469 + struct nbd_device *lo;
474 + if (rq_type(req) == REQ_SPECIAL) // PTB contains no data
477 + lo = req->rq_disk->private_data;
479 + log_sectors_per_blk = lo->logblksize - 9;
480 + sectors_per_blk = 1 << log_sectors_per_blk;
482 + sectors = req->nr_sectors;
483 + size = (sectors + sectors_per_blk - 1) >> log_sectors_per_blk;
489 + * return a temporary buffer containing the (1 or 2 char) device letter.
490 + * This works for i up to 26*26. 0 is "a". The buffer is zero
493 + * @i number to be translated to x[y] alphabetical form.
496 +device_letter (int i)
499 + static char buf[3];
500 + static int cached_i = -1;
513 + buf[0] = 'a' + i / 26;
514 + buf[1] = 'a' + i % 26;
520 + * PTB auxiliary functions for manipulating the sequence number. Isn't
521 + * there anything private we can use in a request?
523 + * This function returns the sequno
525 + * @req the request to get the sequence number of
528 +rq_seqno (struct request *req)
530 + return req->flags >> __REQ_NBDSEQNO;
533 +rq_set_seqno (struct request *req, int val)
535 + // PTB preserve first __REQ_NR_BITS bits
536 + req->flags &= REQ_NBDSEQNO - 1;
537 + // PTB shift by one more than strictly necessary (see rq_seqno)
538 + req->flags |= val << __REQ_NBDSEQNO;
542 + * PTB sync the device. Modes:
543 + * @arg = 1: Do it sync
544 + * @arg = 0: Do it async
546 + * We can't call sync_dev outside a process context. I don't know why.
547 + * Death results from a scheduled attempt.
549 + * Call without the semaphore held, as we lock it and call sync_dev.
552 +nbd_sync (struct nbd_device *lo, long arg)
554 + struct inode *inode = lo->inode;
555 + short minor, nbd, islot;
557 + islot = atomic_read (&lo->islot);
559 + if (!(atomic_read (&lo->flags) & NBD_INITIALISED) || !inode) {
563 + minor = minor (inode->i_rdev);
564 + nbd = minor >> NBD_SHIFT;
566 + // PTB sync_dev is async. fsync_dev is sync.
569 + // PTB 2.5.7 does not have async sync! FIXME
572 + fsync_bdev (inode->i_bdev);
573 + invalidate_buffers (mk_kdev (major, nbd << NBD_SHIFT));
583 +nbd_async_sync (struct nbd_device *lo)
588 +nbd_sync_sync (struct nbd_device *lo)
594 + * Do sync async if we're enabled, sync if we're not.
596 + * @lo the device to maybe sync (sync or async sync!)
599 +nbd_maybe_sync_sync (struct nbd_device *lo)
602 + if ((atomic_read (&lo->flags) & NBD_ENABLED)
603 + && !(atomic_read (&lo->flags) & NBD_REMOTE_INVALID)) {
604 + nbd_async_sync (lo);
607 + nbd_sync_sync (lo);
614 + * PTB - put a request onto the head of a nbd device's queue
615 + * - presumably having taken it off the kernel's queue first!
616 + * - We take the queue spinlock.
618 + * @lo = the device we are on (could we get it from the req?)
619 + * @req = the request we shift
620 + * @irqsave = save and restore irqmask when taking our queue spinlock
623 +nbd_enqueue (struct nbd_device *lo, struct request *req)
625 + unsigned long req_blks = nr_blks (req);
627 + if (req_blks < 0) {
628 + short islot = atomic_read (&lo->islot);
629 + NBD_ERROR ("(%d): invalid req %p. Not touching!\n", islot, req);
633 + /* PTB accounting and nothing more - first, specials */
634 + if (! (req->flags & REQ_SPECIAL)) {
635 + // PTB the special req counting semantics relies on
636 + // countq not including itself in the count!
639 + cmd = rq_data_dir (req);
640 + atomic_add (req_blks, &lo->requests_in[cmd]);
642 + // PTB do we need locks here? Apparently not.
643 + atomic_inc (&lo->countq[cmd]);
644 + countq = atomic_read (&lo->countq[cmd]);
646 + // PTB the maxes are just noncritical stats
647 + if (atomic_read (&lo->maxq[cmd]) < countq)
648 + atomic_set (&lo->maxq[cmd], countq);
649 + atomic_inc (&lo->req_in[cmd][req_blks]);
650 + // PTB the maxes are just noncritical stats
651 + if (atomic_read (&lo->maxreqblks) < req_blks)
652 + atomic_set (&lo->maxreqblks, req_blks);
655 + write_lock (&lo->queue_lock);
657 + list_add (&req->queuelist, &lo->queue);
659 + write_unlock (&lo->queue_lock);
661 + wake_up_interruptible (&lo->wq);
666 + * PTB - remove a request from anywhere in the nbd device general queue
667 + * - return 0 for success, -ve for fail
669 + * We need to hold the queue lock when calling this routine.
670 + * It walks the queue.
672 + * @lo the nbd device
673 + * @req the request to be removed
676 +nbd_remove (struct nbd_device *lo, struct request *req)
683 + list_del_init (&req->queuelist);
685 + /* PTB accounting and nothing more */
686 + cmd = rq_data_dir (req);
687 + atomic_dec (&lo->countq[cmd]);
692 + * PTB - Open the device. This is the blkops function.
695 +nbd_open (struct inode *inode, struct file *file)
698 + struct nbd_device *lo;
704 + if (!inode && file) { /* added by ptb for 2.0.35. Necessary? */
705 + inode = file->f_dentry->d_inode;
708 + NBD_ERROR ("null inode.\n");
712 + dev = minor (inode->i_rdev);
713 + nbd = dev >> NBD_SHIFT;
714 + part = dev - (nbd << NBD_SHIFT);
717 + if (nbd >= MAX_NBD) {
718 + NBD_ERROR ("too many (%d) whole devices open\n", nbd);
722 + lo = &nbd_dev[nbd];
723 + devnam = lo->devnam;
725 + /* PTB provision for opening for direct i/o - gives mount aid */
727 + && (atomic_read(&lo->flags) & NBD_DIRECT)
728 + && !(file->f_flags & O_DIRECT)) {
729 + /* PTB we set NOFOLLOW to show we did it ! */
730 + file->f_flags |= O_DIRECT | O_NOFOLLOW;
734 + /* PTB we have got the whole dev's file or inode for 1st time */
735 + if (!lo->file || lo->file != file) {
737 + atomic_set (&(&lo->wspeed)->frstj, jiffies);
738 + atomic_set (&(&lo->rspeed)->frstj, jiffies);
739 + atomic_set (&(&lo->tspeed)->frstj, jiffies);
741 + if (!lo->inode || lo->inode != inode) {
744 + if (!(atomic_read (&lo->flags) & NBD_INITIALISED)) {
745 + atomic_set_mask (NBD_INITIALISED, &lo->flags);
749 + atomic_inc (&lo->refcnt);
751 + if (!(atomic_read (&lo->flags) & NBD_VALIDATED)
753 + && (atomic_read (&lo->flags) & NBD_ENABLED)) {
754 + NBD_INFO ("partition check on device nd%s\n", lo->devnam);
755 + check_disk_change(inode->i_bdev);
758 + * PTB do we set VALIDATED here, or let the kernel call
759 + * sequence result in it happening via our removable
760 + * device routines? Let's go for the latter option.
768 + * PTB - complete a transaction irrefutably by taking it out of the
769 + * - slot pending position it is in, and reporting end_request to kernel
771 + * We are called without locks because our call to end request
772 + * will take some sort of lock momentarily and we don't need
773 + * locks because our request should already be off all queues.
775 + * @slot the nbd_slot on which the req notionally was
776 + * @req the poor defenceless kernel request about to be acked
779 +nbd_commit (struct nbd_slot *slot, struct request *req)
782 + struct nbd_device *lo = slot->lo;
783 + unsigned long req_blks = nr_blks (req);
786 + if (req_blks < 0) {
787 + NBD_ERROR ("corrupted req %p. Not touching with bargepole.\n",
792 + list_del_init (&req->queuelist);
794 + nbd_end_request_lock (req);
795 + blk_put_request (req);
798 + slot->req -= req_blks;
800 + /* PTB accounting and nothing more */
801 + cmd = rq_data_dir (req);
803 + atomic_sub (req_blks, &lo->requests_req[cmd]);
804 + if (req->errors != 0) {
805 + /* PTB error exit */
806 + atomic_add (req_blks, &lo->requests_err);
807 + slot->err += req_blks;
811 + atomic_add (req_blks, &lo->requests_out[cmd]);
812 + slot->out += req_blks;
815 + /* PTB everything but a write was easy */
819 + * PTB now non error case writes
821 + * account the 4 cases for a md5sum'd transaction
824 + switch (slot->flags & (NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK)) {
826 + case NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK:
827 + atomic_add (req_blks, &lo->wrequests_5to); // 11
828 + atomic_add (req_blks, &lo->wrequests_5so);
829 + // PTB zero the countdown to turning off md5 as it works
830 + atomic_set (&lo->wrequests_5co, 0);
833 + case NBD_SLOT_MD5SUM:
834 + atomic_add (req_blks, &lo->wrequests_5to); // 10
835 + atomic_add (req_blks, &lo->wrequests_5wo);
836 + atomic_inc (&lo->wrequests_5co);
837 + if (atomic_read (&lo->wrequests_5co) > md5_off_threshold) {
838 + atomic_set (&lo->wrequests_5co, 0);
839 + // PTB turn off md5summing as it is not successful
840 + atomic_clear_mask (NBD_MD5SUM, &lo->flags);
844 + case NBD_SLOT_MD5_OK:
845 + atomic_add (req_blks, &lo->wrequests_5to); // 01
846 + atomic_add (req_blks, &lo->wrequests_5eo);
847 + atomic_inc (&lo->wrequests_5co);
848 + if (atomic_read (&lo->wrequests_5co) > md5_off_threshold) {
849 + atomic_set (&lo->wrequests_5co, 0);
850 + // PTB turn off md5summing as it is errored
851 + atomic_clear_mask (NBD_MD5SUM, &lo->flags);
857 + // PTB nobody asked for a md5 and nobdy gave one back
858 + atomic_inc (&lo->wrequests_5no);
859 + if (atomic_read (&lo->wrequests_5no) > md5_on_threshold) {
860 + atomic_set (&lo->wrequests_5no, 0);
861 + // PTB turn on md5summing every so often
862 + atomic_set_mask (NBD_MD5SUM, &lo->flags);
867 + // PTB clear the md5sum indicators from the slot afterwards!
868 + slot->flags &= ~(NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK);
870 + // PTB we ran out of difficult cases, so return
874 + * PTB - error out a transaction irrefutably by taking it out of the
875 + * - slot pending position it is in, and reporting end_request to kernel
877 + * We must be called without spinlocks held, as we take it in end req
879 + * @slot the nbd_slot on which the req notionally was
880 + * @req the poor defenceless kernel request about to be errored
883 +nbd_error (struct nbd_slot *slot, struct request *req)
885 + struct nbd_device *lo = slot->lo;
886 + unsigned long req_blks = nr_blks (req);
889 + if (req_blks < 0) {
890 + NBD_ERROR ("passed illegal request %p\n", req);
896 + * PTB We don't need the queue spinlock since we don't touch our queue,
897 + * and we're the only ones working on this slot.
899 + list_del_init (&req->queuelist);
901 + NBD_ALERT ("error out req %p from slot %d!\n", req, slot->i);
903 + nbd_end_request_lock (req);
904 + blk_put_request (req);
906 + /* PTB accounting and nothing more */
907 + cmd = rq_data_dir (req);
908 + atomic_sub (req_blks, &lo->requests_req[cmd]);
910 + slot->in -= req_blks;
911 + slot->req -= req_blks;
914 + slot->err += req_blks;
915 + atomic_add (req_blks, &lo->requests_err);
919 + * Take a request out of a slot. This must not hold the queuelock on
920 + * entry as we take the queue lock in order to play with the devices
923 + * @slot the nbd slot on which to work
927 +nbd_rollback (struct nbd_slot *slot, struct request *req)
930 + struct nbd_device *lo = slot->lo;
931 + unsigned long req_blks, flags;
933 + struct list_head *pos;
934 + struct request *xreq;
936 + if (atomic_read (&lo->flags) & NBD_SHOW_ERRS) {
937 + nbd_error (slot, req);
941 + req_blks = nr_blks (req);
943 + if (req_blks < 0) {
944 + NBD_ERROR ("passed illegal request %p\n", req);
948 + list_del_init (&req->queuelist);
950 + NBD_ALERT ("rollback req %p from slot %d!\n", req, slot->i);
952 + if (! (req->flags & REQ_SPECIAL)) {
953 + /* PTB accounting */
954 + slot->in -= req_blks;
955 + slot->req -= req_blks;
958 + seqno = rq_seqno(req);
960 + write_lock_irqsave(&lo->queue_lock, flags);
961 + list_for_each_prev (pos, &lo->queue) {
962 + xreq = list_entry (pos, struct request, queuelist);
963 + if (rq_seqno(xreq) > seqno) {
967 + list_add_tail (&req->queuelist, pos);
968 + write_unlock_irqrestore(&lo->queue_lock, flags);
973 + * PTB - undo transactions by taking them out of the slot pending
974 + * - position and replacing them on the generic device queue
975 + * - NB we do not hold the io request lock or queue sem when
976 + * - calling this as we take it internall in nbd_rollback
978 + * @slot the nbd slot to scan
981 +nbd_rollback_all (struct nbd_slot *slot)
984 + struct request *req;
987 + while (!list_empty (&slot->queue)) {
989 + if (count++ > 1000)
992 + req = list_head (&slot->queue, struct request, queuelist);
997 + nbd_rollback (slot, req);
1003 + * PTB error out all the requests on a slot
1005 + * We must be called without the io spinlock held, as we take it in
1008 + * @slot the nbd slot to scan
1011 +nbd_error_all (struct nbd_slot *slot)
1014 + struct request *req;
1017 + while (!list_empty (&slot->queue)) {
1018 + if (count++ > 1000)
1020 + req = list_head (&slot->queue, struct request, queuelist);
1023 + nbd_error (slot, req);
1028 + * PTB - let a request onto the slot pending position
1029 + * - Can be called without the spinlock and doesn't take the
1030 + * spinlock as we only deal with our unique slot. If there
1031 + * were more than one client per slot this woould be a problem
1032 + * but there aren't so it isn't.
1034 + * @slot the nbd slot to let the request onto
1035 + * @req the request to move onto the slot queue
1038 +nbd_accept (struct nbd_slot *slot, struct request *req)
1041 + struct nbd_device *lo = slot->lo;
1042 + unsigned long req_blks = nr_blks (req);
1048 + /* PTB accounting and nothing more */
1049 + cmd = rq_data_dir (req);
1051 + atomic_add (req_blks, &lo->requests_req[cmd]);
1052 + /* PTB - Note that this really is slot and not lo.
1054 + list_add (&req->queuelist, &slot->queue);
1056 + slot->req_age = jiffies;
1057 + slot->in += req_blks;
1058 + slot->req += req_blks;
1062 + * PTB - read from userspace to a request buffer. Do it piecewuse
1063 + * - to cope with clustered requests.
1064 + * - return number of bytes read
1066 + * Unfortunately the only way we can return less than the right
1067 + * number of bytes is when the receiving req does not have the
1068 + * right number of buffers, because the copy_from_user itself
1069 + * doesn't tell us.
1072 +copy_from_user_to_req (struct request *req, char *user, int len)
1075 + unsigned size = 0;
1076 + struct bio *bio /* = req->bio */;
1078 + /* PTB assume user verified */
1080 + rq_for_each_bio(bio, req) {
1083 + struct bio_vec * bvl;
1085 + bio_for_each_segment(bvl, bio, i) {
1087 + struct page *page = bvl->bv_page;
1088 + int offset = bvl->bv_offset;
1089 + const unsigned current_size
1092 + buffer = page_address(page) + offset;
1094 + copy_from_user (buffer, user + size, current_size);
1096 + size += current_size;
1099 + if (size != len) {
1100 + NBD_ALERT ("requested %d and only read %d bytes to req %p\n",
1102 + NBD_ALERT ("request %p wanted to read user space buffer %p\n",
1109 + * PTB - andres' kernel half of the user-space network handshake, used
1110 + * - to complete a transaction.
1111 + * - return 0 for success and -ve for fail.
1113 + * @slot the nbd slot being acted on
1117 +nbd_ack (struct nbd_slot *slot)
1119 + struct nbd_reply reply;
1120 + struct request *req, *xreq;
1124 + unsigned long req_blks = 1;
1125 + struct nbd_device *lo = slot->lo;
1126 + unsigned buflen = 0;
1129 + struct list_head *pos;
1132 + if (!(slot->flags & NBD_SLOT_BUFFERED)) {
1136 + atomic_inc (&lo->cthreads);
1137 + slot->flags |= NBD_SLOT_RUNNING;
1138 + slot->cli_age = jiffies;
1140 + user = slot->buffer;
1141 + copy_from_user ((char *) &reply, (char *) user,
1142 + sizeof (struct nbd_reply));
1144 + // PTB we keep tracking the write position in the input buffer
1145 + buflen += NBD_BUFFER_DATA_OFFSET;
1147 + // PTB save the reply handle (which is an address) as our req
1148 + memcpy (&req, &reply.handle, sizeof (req));
1151 + list_for_each (pos, &slot->queue) {
1152 + xreq = list_entry (pos, struct request, queuelist);
1153 + if (count++ > 1000)
1156 + /* PTB found it */
1160 + if (xreq != req) {
1162 + if (slot->nerrs++ < 3)
1163 + NBD_ALERT ("fatal: Bad handle %p != %p!\n",
1166 + atomic_dec (&lo->cthreads);
1167 + slot->flags &= ~NBD_SLOT_RUNNING;
1169 + NBD_ALERT("ignoring ack of req %p which slot does not have\n",
1173 + * PTB we lie and say success because userspace got through to
1174 + * us OK and the req they missed has been rolled back and will
1175 + * be retransmitted by the kernel later and elsewhere
1180 + if (reply.magic != NBD_REPLY_MAGIC) {
1182 + if (slot->nerrs++ < 3)
1183 + NBD_ALERT ("Not enough reply magic in %s\n",
1186 + * PTB returning -EAGAIN causes the client to pause 0.5s
1187 + * and throw its reply away, then return to service. We leave
1188 + * any request we have to age and be rolled back.
1193 + if (reply.error > 0 || req->errors > 0) {
1194 + /* PTB wasn't error++'ed before */
1196 + if (slot->nerrs++ < 3)
1197 + NBD_ALERT ("exited with reply error\n");
1198 + /* PTB we handle this - it's a repmote error */
1199 + NBD_FAIL ("remote error on request\n");
1202 + req_blks = nr_blks (req);
1204 + reqlen = req->nr_sectors;
1207 + cmd = rq_type (req);
1211 + unsigned long rcmd;
1217 + // PTB We have to copy the buffer bit by bit in
1218 + // case the request is clustered.
1221 + copy_from_user_to_req (req, ((char *) user) + buflen, reqlen);
1222 + if (size < reqlen) {
1224 + ("(%d): copy %dB from user to req %p failed (%d)\n",
1225 + slot->i, reqlen, req, size);
1226 + // PTB we could try again? We should investigate.
1228 + ("exited because of bad copy from user\n");
1229 + // PTB FIXME - think we want to discard and retry
1232 + // PTB we keep tracking the write position in the buffer
1238 + * PTB we want to know if the reply is md5summed, and if it is
1239 + * whether the md5sum is the same as the one on the
1240 + * request. But that's not something we can presently see
1241 + * from here as we don't make an md5sum in the kernel.
1242 + * So we have to rely on the reply flag from userspace.
1243 + * We transmit the information to the slot, as we can't
1244 + * keep it on the request.
1247 + switch (reply.flags &
1248 + (NBD_REPLY_MD5SUM | NBD_REPLY_MD5_OK)) {
1250 + case NBD_REPLY_MD5SUM | NBD_REPLY_MD5_OK:
1252 + * PTB we asked for an md5sum comparison and
1253 + * the two matched, so we skipped writing the request
1255 + slot->flags |= (NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK); //11
1257 + case NBD_REPLY_MD5SUM:
1258 + // PTB the two differed, so we wrote the request
1259 + slot->flags |= NBD_SLOT_MD5SUM;
1260 + slot->flags &= ~NBD_SLOT_MD5_OK; // 10
1262 + case NBD_REPLY_MD5_OK:
1263 + // PTB the server refused the md5 request
1264 + slot->flags &= ~NBD_SLOT_MD5SUM;
1265 + slot->flags |= NBD_SLOT_MD5_OK; // 01
1269 + // PTB mobody asked for an md5sum comparison
1270 + slot->flags &= ~(NBD_SLOT_MD5SUM | NBD_SLOT_MD5_OK);//00
1273 + // PTB now we are all set up to do the accounting in commit etc.
1277 + // PTB FIXME. Just temporary.
1278 + NBD_ALERT ("special req %p on slot %d\n", req, slot->i);
1286 + if (!(reply.flags & NBD_REPLY_IOCTL))
1287 + NBD_ALERT ("ioctl reply to req %p has no ioctl flag\n",
1290 + // PTB the commit should emit the request notification
1292 + rcmd = (long) req->special;
1293 + arg = req->buffer;
1297 + NBD_FAIL ("unauthorized remote ioctl\n");
1300 + if (!(_IOC_DIR (cmd) & _IOC_READ)) {
1305 + * PTB We saved ioctl size in req .. but only approximately,
1310 + * PTB if we are reading, it should be to the local
1311 + * buffer arg, which points at lo->ctldata or other buffer
1314 + // PTB we are treating a saved local address or direct val
1315 + if (req->nr_sectors > 0) {
1317 + * PTB sectors is an overestimate. Should be
1318 + * OK as we are reading from the client
1319 + * buffer which has plenty of room to spare.
1321 + int size = req->nr_sectors << 9;
1322 + copy_from_user (arg, (char *) user + buflen, size);
1334 + * PTB - completion (or erroring) of transaction.
1335 + * note that nbd_commit will take a lock to do end_req
1337 + nbd_commit (slot, req);
1338 + atomic_dec (&lo->cthreads);
1339 + slot->flags &= ~NBD_SLOT_RUNNING;
1343 + /* PTB we will next do a client rollback on the slot from userspace.
1344 + * Right here we just skip the request.
1345 + * But .. don't error the request. We might have rolled it
1346 + * back and be referencing it.
1348 + if (result != -EAGAIN && result != 0) {
1349 + req->errors += req_blks;
1350 + slot->err += req_blks;
1352 + result = result < 0 ? result : -ENODEV;
1353 + // PTB one client thread leaves
1354 + atomic_dec (&lo->cthreads);
1355 + slot->flags &= ~NBD_SLOT_RUNNING;
1360 + * PTB - write to userspace from a request buffer. Do it piecewuse
1361 + * - to cope with clustered requests.
1362 + * - return number of bytes written
1365 +copy_to_user_from_req (struct request *req, char *user, int len)
1368 + unsigned size = 0;
1369 + struct bio *bio /* = req->bio */;
1371 + /* PTB assume user verified */
1373 + rq_for_each_bio(bio, req) {
1376 + struct bio_vec * bvl;
1378 + bio_for_each_segment(bvl, bio, i) {
1380 + struct page *page = bvl->bv_page;
1381 + int offset = bvl->bv_offset;
1382 + const unsigned current_size
1385 + buffer = page_address(page) + offset;
1387 + copy_to_user (user + size, buffer, current_size);
1389 + size += current_size;
1397 + * PTB do the devices three speed updates
1399 + * @lo the nbd device to do the update on
1402 +nbd_set_speed (struct nbd_device *lo)
1405 + struct nbd_speed *wspd = &lo->wspeed;
1406 + struct nbd_speed *rspd = &lo->rspeed;
1407 + struct nbd_speed *tspd = &lo->tspeed;
1408 + w = atomic_read (&lo->requests_in[WRITE]);
1409 + wspd->update (wspd, w);
1410 + r = atomic_read (&lo->requests_in[READ]);
1411 + rspd->update (rspd, r);
1413 + tspd->update (tspd, t);
1419 + * PTB - andres' kernel half of the userspace networking. This part
1420 + * - initiates the transaction by taking a request off the generic
1421 + * - device queue and placing it in the slots pending position.
1422 + * - I believe we return 0 for success and -ve for fail.
1423 + * - timeo is the number of jiffies we are prepared to wait
1425 + * @slot the nbd slot to act on.
1428 +nbd_get_req (struct nbd_slot *slot)
1430 + struct nbd_request request;
1431 + struct request *req;
1433 + static atomic_t count;
1434 + unsigned start_time = jiffies;
1435 + struct nbd_device *lo = slot->lo;
1436 + unsigned timeout = lo->req_timeo * HZ;
1437 + int islot = slot->i;
1438 + // PTB for the new timezone field in requests
1439 + extern struct timezone sys_tz;
1440 + struct timeval time;
1441 + unsigned long flags;
1442 + struct nbd_seqno * seqno_out = &lo->seqno_out;
1444 + atomic_inc (&lo->cthreads); // PTB - client thread enters
1445 + slot->flags |= NBD_SLOT_RUNNING;
1446 + slot->cli_age = jiffies;
1448 + if (!(slot->flags & NBD_SLOT_BUFFERED)) {
1449 + NBD_FAIL ("Our slot has no buffer");
1452 + atomic_set (&lo->islot, islot);
1454 + if (!list_empty (&slot->queue)) {
1455 + NBD_FAIL ("impossible! already treating one request");
1456 + // PTB we do a nontrivial rollback from the user daemon
1458 + if (!slot->file) {
1460 + NBD_FAIL ("Our slot has been nofiled");
1462 + if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
1464 + NBD_FAIL ("Our slot has been vamooshed");
1467 + atomic_inc (&lo->cwaiters);
1468 + slot->flags |= NBD_SLOT_WAITING;
1470 + // PTB take spinlock in order to examine queue
1471 + // we need to protect ourselves against the request fn too
1472 + read_lock_irqsave (&lo->queue_lock, flags);
1473 + atomic_dec (&lo->cwaiters);
1474 + slot->flags &= ~NBD_SLOT_WAITING;
1476 + // PTB - now spin until request arrives to treat
1477 + while (slot->file && list_empty (&lo->queue)) {
1479 + static int nbd_clr_sock (struct nbd_slot *slot); // forward decl
1481 + int time_left = start_time + timeout - jiffies;
1483 + read_unlock_irqrestore (&lo->queue_lock, flags);
1485 + // PTB one client thread goes to sleep
1486 + atomic_inc (&lo->cwaiters);
1487 + slot->flags |= NBD_SLOT_WAITING;
1489 + interruptible_sleep_on_timeout (&lo->wq, time_left);
1491 + slot->flags &= ~NBD_SLOT_WAITING;
1492 + // PTB one client thread reactivates
1493 + atomic_dec (&lo->cwaiters);
1494 + atomic_inc (&count);
1496 + // PTB Have to take the spinlock again to check at the queue
1497 + atomic_inc (&lo->cwaiters);
1498 + slot->flags |= NBD_SLOT_WAITING;
1499 + // we need to protect ourselves against the request fn too
1500 + read_lock_irqsave (&lo->queue_lock, flags);
1501 + atomic_dec (&lo->cwaiters);
1502 + slot->flags &= ~NBD_SLOT_WAITING;
1504 + // PTB fail for recheck if we are inactive too long
1506 + time_left = start_time + timeout - jiffies;
1507 + if (time_left > 0 || !list_empty (&lo->queue))
1510 + // PTB bad. timeout with nothing on queue. Error out.
1513 + // PTB we will exit with fail, so up spinlock now
1514 + read_unlock_irqrestore (&lo->queue_lock, flags);
1516 + siz = lo->blksize + sizeof (struct nbd_request);
1517 + // PTB verify the buffer is still OK - holds one block
1518 + if (access_ok(VERIFY_WRITE,slot->buffer,siz))
1521 + // PTB buffer is invalid
1524 + // PTB clr_sock takes both the io lock and the spinlock
1525 + nbd_clr_sock (slot);
1526 + NBD_FAIL ("Our process has died or lost its buffer");
1529 + * PTB we may do a rollback from the user daemon here
1530 + * but it'll be trivial - without effect - as we don't
1531 + * have a request in our slot to treat.
1535 + } // end while loop
1537 + // PTB we still have the (read) spinlock here
1539 + if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
1540 + read_unlock_irqrestore (&lo->queue_lock, flags);
1542 + NBD_FAIL ("Our slot vaporized while we slept!");
1544 + if (!slot->file) {
1545 + read_unlock_irqrestore (&lo->queue_lock, flags);
1547 + NBD_FAIL ("Our slot nofiled itself while we slept!");
1549 + if (!list_empty (&slot->queue)) {
1550 + read_unlock_irqrestore (&lo->queue_lock, flags);
1552 + NBD_FAIL ("impossible! already treating one request");
1553 + // PTB we do a nontrivial rollback from the user daemon
1556 + // PTB now relinquish the read lock and try for the write lock
1557 + read_unlock_irqrestore (&lo->queue_lock, flags);
1559 + write_lock_irqsave (&lo->queue_lock, flags);
1560 + // PTB got the write lock
1562 + if (list_empty (&lo->queue)) {
1563 + write_unlock_irqrestore (&lo->queue_lock, flags);
1564 + // PTB - somebody else did it while we waited on spinlock. OK
1566 + NBD_FAIL ("ho hum beaten to the punch");
1567 + // PTB we may do a trivial rollback from the user daemon
1570 + // PTB cli/sti here looks unnec. hardware interrupts return here
1571 + // AMARIN begin uninterruptible code
1573 + // PTB we have the (write) spinlock
1575 + // PTB oldest=last element in queue
1576 + req = list_tail (&lo->queue, struct request, queuelist);
1578 + // PTB this is where we free the req from our queue. We need to be
1579 + // holding our spinlock at this point
1581 + // PTB - must succeed as have the spinlock
1582 + result = nbd_remove (lo, req);
1583 + // PTB now holding irqs off in nbd_remove
1585 + // AMARIN end uninterruptable code
1586 + // PTB uh - maybe cli/sti is needed? interrupts can muck the queue?
1587 + // - Nah! I have left them enabled so we can see any errors.
1589 + write_unlock_irqrestore (&lo->queue_lock, flags);
1591 + request.magic = NBD_REQUEST_MAGIC;
1592 + request.flags = 0;
1594 + switch (rq_type (req)) {
1596 + unsigned long cmd;
1602 + request.type = IOCTL;
1604 + // PTB this is our special ioctl kernel request
1606 + cmd = (unsigned long) req->special;
1607 + arg = req->buffer;
1608 + size = req->nr_sectors << 9;
1610 + // PTB the arg was a literal
1613 + // PTB we are in get_req, transferring stored ioctl
1614 + if ((_IOC_DIR (cmd) & _IOC_READ) && size > 0) {
1615 + // PTB if len is +ve we copy to the user buffer later
1616 + request.len = size;
1618 + // PTB we store the weirded ioctl id.
1619 + // PTB Yes, this composition is our private invention.
1620 + request.from = (((__u64) cmd) << 32)
1621 + // PTB really want this to go to a 64 bit request.special
1622 + | ((__u64) (unsigned long) arg);
1628 + request.type = rq_data_dir (req);
1629 + request.from = req->sector;
1630 + request.from <<= 9;
1631 + request.len = req->nr_sectors;
1632 + request.len <<= 9;
1633 + if (atomic_read (&lo->flags) & NBD_MD5SUM) {
1634 + // PTB set the please do md5sum flag on the request
1635 + request.flags |= NBD_REQUEST_MD5SUM;
1643 + request.type = SPECIAL;
1644 + request.len = req->nr_sectors;
1645 + request.len <<= 9;
1646 + request.from = req->sector;
1647 + request.from <<= 9;
1648 + if (rq_data_dir (req) == WRITE)
1649 + request.flags |= NBD_REQUEST_SPECIALRW;
1650 + request.special = (typeof(request.special))req->special;
1654 + NBD_ALERT ("received unknown req %p type %#x\n",
1655 + req, rq_type (req));
1659 + request.seqno = seqno_out->calc(seqno_out, rq_seqno (req));
1662 + * PTB we should here erase the extra seqno info in the request
1663 + * so that on error or on ack the kernel can use the right internal
1664 + * array, but I'll erase it in the ack function instead
1667 + do_gettimeofday (&time);
1668 + request.time = time.tv_sec;
1669 + request.time *= 1000000;
1670 + request.time += time.tv_usec;
1671 + request.zone = sys_tz.tz_minuteswest;
1673 + // PTB tz_dsttime = 0 always in linux
1675 + memcpy (&request.handle, &req, sizeof (request.handle));
1677 + copy_to_user (slot->buffer, (char *) &request, sizeof (request));
1679 + switch (request.type) {
1688 + if (request.len <= 0)
1689 + break; // PTB presumably nothing to do
1690 + arg = (char *) slot->buffer + NBD_BUFFER_DATA_OFFSET;
1691 + copy_to_user (arg, req->buffer, request.len);
1695 + arg = (char *) slot->buffer + NBD_BUFFER_DATA_OFFSET;
1696 + err = copy_to_user_from_req (req, arg, request.len);
1697 + if (err >= request.len)
1699 + // PTB buffer had missing BHSs
1700 + NBD_ERROR ("req %p offered %d bytes of %d for copy to user\n",
1701 + req, result, request.len);
1702 + // PTB this request is badly damaged. We had better shoot it.
1703 + if (req && req->errors == 0) {
1705 + nbd_end_request_lock (req);
1706 + blk_put_request (req);
1708 + NBD_FAIL ("kernel failed to keep req while we copied from it");
1713 + // PTB temporary. We do not treat specials at the moment.
1717 + NBD_ERROR ("req %p was type %#x\n", req, rq_type(req));
1718 + NBD_FAIL ("unknown req type");
1723 + * PTB nbd_accept does not take spinlock and does not need to as
1724 + * the req is already free of the shared queue and only needs
1725 + * to be placed on the unique slot queue.
1728 + nbd_accept (slot, req);
1730 + atomic_dec (&lo->cthreads); // PTB - client thread leaves normally
1731 + slot->flags &= ~NBD_SLOT_RUNNING;
1736 + // PTB accounting - a fail to get a request is not an errored request
1737 + atomic_dec (&lo->cthreads); // PTB - client thread leaves abnormally
1738 + slot->flags &= ~NBD_SLOT_RUNNING;
1739 + result = result < 0 ? result : -ENODEV;
1745 + * PTB error out the pending requests on the kernel queue
1746 + * We have to be called WITHOUT the io request lock held.
1747 + * We sleep imbetween clearing each request, for "safety".
1749 + * @lo the nbd device to scan
1752 +nbd_clr_kernel_queue (struct nbd_device *lo)
1756 + unsigned long flags;
1757 + request_queue_t *q = lo->q;
1759 + spin_lock_irqsave (q->queue_lock, flags);
1761 + while (! blk_queue_empty(q) && count++ < 1000) {
1762 + struct request *req;
1763 + req = elv_next_request(q);
1764 + if (!req) { // PTB impossible
1765 + spin_unlock_irqrestore (q->queue_lock, flags);
1767 + ("impossible! kernel queue empty after tested nonemty!\n");
1770 + blkdev_dequeue_request (req);
1771 + spin_unlock_irqrestore (q->queue_lock, flags);
1775 + nbd_end_request_lock (req);
1776 + blk_put_request (req);
1777 + spin_lock_irqsave (q->queue_lock, flags);
1779 + spin_unlock_irqrestore (q->queue_lock, flags);
1783 + /* PTB fall thru */
1785 + NBD_ALERT ("removed %d requests\n", count);
1791 + * PTB error out the pending requests on the nbd queue and kernel queue
1792 + * Note that we take the queue spinlock for this
1794 + * @lo the nbd device to scan
1797 +nbd_clr_queue (struct nbd_device *lo)
1801 + while (count < 1000) {
1803 + struct request *req;
1804 + unsigned long req_blks = 1;
1806 + // PTB cannot allow new requests via interrupts
1807 + write_lock (&lo->queue_lock);
1808 + if (list_empty (&lo->queue)) {
1809 + write_unlock(&lo->queue_lock);
1812 + req = list_head (&lo->queue, struct request, queuelist);
1814 + write_unlock(&lo->queue_lock);
1818 + req_blks = nr_blks (req);
1820 + req->errors += req_blks + 1;
1821 + atomic_add (req_blks, &lo->requests_err);
1823 + /* PTB - must succeed as have the spinlock */
1824 + nbd_remove (lo, req);
1825 + /* PTB now hold irqs off in nbd_remove */
1826 + write_unlock(&lo->queue_lock);
1829 + nbd_end_request_lock (req);
1830 + blk_put_request (req);
1833 + NBD_ALERT ("unqueued %d reqs\n", count);
1838 + * PTB do under alt spinlock - we take the lo queue_lock oursekves.
1839 + * We take all requests off the alt queue to which they've been
1840 + * diverted and put them on the devices normal queue, where they will
1841 + * then be treated in the normal course of events. They were diverted
1842 + * to the alt queue after we received a SPECIAL, and they're being
1843 + * released now that we've treated all the extant reqs.
1845 + * @lo the nbd device being treated
1848 +nbd_requeue (struct nbd_device *lo)
1852 + while (count < 1000) {
1854 + struct request *req;
1856 + // PTB cannot allow new requests via interrupts
1857 + if (list_empty (&lo->altqueue)) {
1860 + req = list_tail (&lo->altqueue, struct request, queuelist);
1864 + // PTB heisenbug? without these list_del oopses on null deref
1865 + if (req->queuelist.prev == NULL) {
1866 + NBD_ALERT ("req %p has 0 prev ptr! Abort\n", req);
1869 + if (req->queuelist.next == NULL) {
1870 + NBD_ALERT ("req %p has 0 next ptr! Abort\n", req);
1873 + /* PTB - must succeed as have the spinlock */
1874 + list_del_init (&req->queuelist);
1875 + /* PTB now hold irqs off in nbd_remove */
1878 + nbd_enqueue (lo, req);
1886 +#define NBD_FAIL( s... ) { \
1887 + NBD_ERROR( s); printk("\n"); \
1891 +#ifndef NO_BUFFERED_WRITES
1893 + * Magic function from rd.c that we hope saves a buffer head
1894 + * permanently somewhere in the kernel VM system.
1897 +buffered_write_pagecache_IO (struct buffer_head *sbh, int nbd)
1899 + struct address_space *mapping;
1900 + unsigned long index;
1901 + int offset, size, err;
1902 + struct nbd_device *lo = &nbd_dev[nbd];
1905 + // PTB we need to save the /dev/nda inode
1910 + mapping = lo->inode->i_mapping;
1912 + // PTB index appears to be the page number
1913 + index = sbh->b_rsector >> (PAGE_CACHE_SHIFT - 9);
1914 + // PTB offset is in bytes, and says where in the page the sector starts
1915 + offset = (sbh->b_rsector << 9) & ~PAGE_CACHE_MASK;
1916 + // PTB well, an abbreviation for the buffer size, in bytes
1917 + size = sbh->b_size;
1920 + // PTB we mark each page that we should write to Uptodate
1923 + struct page **hash;
1924 + struct page *page;
1929 + // PTB ummm, how much of the page is left to traverse
1930 + count = PAGE_CACHE_SIZE - offset;
1931 + // PTB reduce it to how much we actually need to traverse
1934 + // PTB say NOW? that we have traversed what we want of the page
1937 + hash = page_hash (mapping, index);
1938 + page = __find_get_page (mapping, index, hash);
1941 + // PTB we get to make a new page
1942 + page = grab_cache_page (mapping, index);
1944 + // PTB failed to get new page
1949 + if (!Page_Uptodate (page)) {
1950 + memset (kmap (page), 0, PAGE_CACHE_SIZE);
1952 + SetPageUptodate (page);
1954 + // PTB the new page is locked. We need to unlock it later
1958 + // PTB prepare already for next page
1961 + // PTB set up for copy
1962 + dst = kmap (page);
1964 + src = bh_kmap (sbh);
1966 + // PTB prepare for next round
1970 + memcpy (dst, src, count);
1976 + UnlockPage (page);
1978 + SetPageDirty (page);
1979 + __free_page (page);
1981 + } while (size > 0);
1988 +buffered_write (struct request *req)
1991 + struct buffer_head *bh;
1992 + int dev = minor (req->rq_dev);
1993 + int nbd = dev >> NBD_SHIFT;
1996 + // PTB go through and copy and protect the written buffers
1997 + for (bh = req->bh; bh; bh = bh->b_reqnext) {
1998 + struct buffer_head *rbh;
2000 + getblk (bh->b_rdev, bh->b_rsector / (bh->b_size >> 9),
2003 + char *bdata = bh_kmap (bh);
2004 + memcpy (rbh->b_data, bdata, rbh->b_size);
2005 + NBD_ALERT ("got new bh sector %lu on write\n",
2009 + mark_buffer_protected (rbh); // PTB equals dirty, uptodate
2010 + err = buffered_write_pagecache_IO (bh, nbd);
2019 +#endif /* NO_BUFFERED_WRITES */
2022 + * PTB check if the device is read only according to int flags
2024 + * @lo the nbd device to be checked
2027 +nbd_read_only(struct nbd_device *lo) {
2028 + return (atomic_read(&lo->flags) & NBD_READ_ONLY) != 0;
2031 + * PTB set the device readonly (or not)
2033 + * @lo the nbd device to be set up
2034 + * @ro 1 for read only, 0 for read write.
2037 +nbd_set_read_only(struct nbd_device * lo, int ro) {
2040 + atomic_set_mask (NBD_READ_ONLY, &lo->flags);
2042 + atomic_clear_mask (NBD_READ_ONLY, &lo->flags);
2045 + // PTB which device really does not matter. We do the checking.
2046 + set_disk_ro (lo->disk, ro != 0);
2050 + * PTB - kernel function to take reqs off the kernel queue. Runs with
2051 + * io lock held. This is the "request function".
2054 +do_nbd_request (request_queue_t * q)
2056 + struct request *req;
2057 + unsigned long flags;
2059 + while (! blk_queue_empty(q)) {
2061 + struct nbd_device *lo;
2063 + req = elv_next_request(q);
2065 + lo = req->rq_disk->private_data;
2067 + /* PTB - one kernel thread enters */
2068 + atomic_inc (&lo->kthreads);
2070 + if (atomic_read (&lo->kthreads) > atomic_read (&lo->kmax))
2071 + atomic_set (&lo->kmax, atomic_read (&lo->kthreads));
2073 + if (!lo->inode || !lo->file) {
2074 + NBD_FAIL ("Request when device not ready.");
2077 + if (rq_data_dir (req) == WRITE && nbd_read_only(lo)) {
2078 + NBD_FAIL ("write on read-only device");
2080 + flags = atomic_read (&lo->flags);
2081 + if (!(flags & NBD_INITIALISED)) {
2082 + NBD_FAIL ("device not initialised.");
2084 + if (!(flags & NBD_ENABLED)) {
2085 + NBD_FAIL ("device not enabled.");
2087 + if (flags & NBD_REMOTE_INVALID) {
2088 + NBD_FAIL ("remote device invalidated.");
2090 + if (req->sector + req->nr_sectors > lo->sectors) {
2091 + NBD_FAIL ("overrange request");
2093 + if (req->sector < 0) {
2094 + NBD_FAIL ("underrange request");
2096 + if (req->rq_disk->major != major) {
2097 + NBD_FAIL ("request for wrong major");
2100 + blkdev_dequeue_request (req);
2102 + // PTB in 2.5 we can release the iolock briefly here
2103 + spin_unlock_irq(q->queue_lock);
2105 + if (req->flags & REQ_SPECIAL) {
2106 + // PTB temporary successful end here for SPECIALS
2108 + // PTB we want to attach it to the device and ack later
2109 + nbd_enqueue (lo, req);
2110 + // PTB block further reqs until these have drained
2111 + write_lock(&lo->altqueue_lock);
2112 + // PTB do not touch this flag without this lock
2113 + if (atomic_read(&lo->countq[READ])
2114 + + atomic_read(&lo->countq[WRITE]) > 0) {
2115 + atomic_set_mask(NBD_QBLOCKED, &lo->flags);
2117 + write_unlock(&lo->altqueue_lock);
2121 + // PTB we are the only reader and writer of lo->seqno
2122 + if (rq_data_dir (req) == WRITE && rq_seqno (req) == 0) {
2123 + // PTB it is a new request never seen before
2124 + struct nbd_seqno * seqno_out = &lo->seqno_out;
2125 + seqno_out->inc(seqno_out);
2127 + * PTB we have to be careful to change this back before
2128 + * giving it back to the kernel, as the kernel uses it.
2129 + * We patch it back again in nbd_end_request.
2131 + rq_set_seqno (req, seqno_out->get(seqno_out));
2134 + // if BLOCK is set divert requests to alt queue
2135 + write_lock(&lo->altqueue_lock);
2136 + if (atomic_read(&lo->flags) & NBD_QBLOCKED) {
2137 + list_add (&req->queuelist, &lo->altqueue);
2138 + write_unlock(&lo->altqueue_lock);
2141 + write_unlock(&lo->altqueue_lock);
2143 + // PTB normal sequence is to queue request locally
2144 + nbd_enqueue (lo, req);
2148 + atomic_dec (&lo->kthreads);
2149 + // PTB regain the iolock for another turn
2150 + spin_lock_irq(q->queue_lock);
2151 + continue; // PTB next request
2154 + // PTB can rely on req being nonnull here
2155 + NBD_ALERT ("ending req %p with prejudice\n", req);
2157 + blkdev_dequeue_request (req);
2158 + spin_unlock_irq(q->queue_lock);
2160 + nbd_end_request_lock (req);
2161 + blk_put_request (req);
2163 + // PTB more accounting
2165 + int req_blks = nr_blks (req);
2166 + atomic_add (req_blks, &lo->requests_err);
2167 + atomic_dec (&lo->kthreads);
2169 + NBD_ALERT("failed to account one orphan errored req\n");
2171 + // PTB regain the queue lock for another turn
2172 + spin_lock_irq(q->queue_lock);
2179 + * PTB pair of helpful additional functions, only good for 1 bit in the
2180 + * mask, however. Modify if you want more.
2182 + * @a the atomic element's address
2183 + * @mask the integer with one bit set in the position that we want to test
2184 + * and set, or clear
2187 +atomic_test_and_set_mask (atomic_t * a, unsigned mask)
2189 + int i = ffs (mask);
2193 + #ifdef __LITTLE_ENDIAN
2194 + return test_and_set_bit (i - 1, (unsigned long *)&a->counter);
2196 + #ifndef __BIGENDIAN
2197 + #error help, I only know about bigendian or littlendian machines
2199 + return test_and_set_bit
2200 + (i - 1 + (sizeof(long)-sizeof(a->counter))*8,
2201 + (unsigned long *)&a->counter);
2205 +atomic_test_and_clear_mask (atomic_t * a, unsigned mask)
2207 + int i = ffs (mask);
2211 + #ifdef __LITTLE_ENDIAN
2212 + return test_and_clear_bit (i - 1, (unsigned long *)&a->counter);
2214 + #ifndef __BIGENDIAN
2215 + #error help, I only know about bigendian or littlendian machines
2217 + return test_and_clear_bit
2218 + (i - 1 + (sizeof(long)-sizeof(a->counter))*8,
2219 + (unsigned long *)&a->counter);
2225 + * PTB - set the enabled flag on a device (call without the spinlock held)
2227 + * @lo the nbd device being treated
2230 +nbd_enable (struct nbd_device *lo) {
2231 + unsigned long flags;
2232 + int did_enabled = 0;
2233 + struct nbd_md *md = &nbd_md;
2235 + // PTB reenable part
2236 + write_lock_irqsave (&lo->meta_lock, flags);
2237 + if (!atomic_test_and_set_mask (&lo->flags, NBD_ENABLED)) {
2238 + // PTB was not enabled before
2239 + atomic_clear_mask (NBD_VALIDATED, &lo->flags);
2243 + write_unlock_irqrestore (&lo->meta_lock, flags);
2246 + md->notify(&nbd_md, mk_kdev (major, lo->nbd << NBD_SHIFT));
2251 + * PTB rollback all requests on a given slot and then invalidate it
2252 + * (so the requests can't go back until somebody reactivates the slot)
2253 + * At least rollback (which we call takes both the io spinlock and our
2254 + * spinlock, so we can hold neither when we are called. Soft_reset
2255 + * (which we call) also calls rollback, so has the same problem.
2257 + * @slot the nbd slot being treated
2260 +nbd_clr_sock (struct nbd_slot *slot)
2263 + struct nbd_device *lo = slot->lo;
2264 + int islot = slot->i;
2265 + unsigned long flags;
2267 + int do_enable = 0;
2268 + static int nbd_soft_reset (struct nbd_device*);
2270 + nbd_rollback_all (slot);
2272 + slot->file = NULL;
2275 + slot->buffer = NULL;
2277 + write_lock_irqsave (&lo->meta_lock, flags);
2279 + /* PTB reset lo->aslot */
2281 + if (lo->aslot > 0) {
2283 + /* PTB grr .. do this the hard way */
2285 + for (i = 0; i < lo->nslot; i++) {
2286 + struct nbd_slot *sloti = &lo->slots[i];
2290 + lo->aslot = aslot;
2292 + if (lo->aslot <= 0) {
2293 + // PTB we were the last client alive, diasable device
2294 + if (atomic_read (&lo->flags) & NBD_SHOW_ERRS) {
2295 + // PTB soft_reset will invalidate_buffers
2296 + atomic_clear_mask (NBD_ENABLED, &lo->flags);
2299 + } else if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
2300 + // PTB must not call reenable as that clears the queue
2306 + // PTB lift the lock temporarily
2307 + write_unlock_irqrestore(&lo->meta_lock, flags);
2309 + nbd_soft_reset (lo);
2313 + NBD_ALERT ("enabled device nd%s\n", lo->devnam);
2315 + write_lock_irqsave(&lo->meta_lock, flags);
2317 + /* PTB reset lo->islot, for no good reason */
2319 + if (atomic_read (&lo->islot) == islot) {
2320 + for (i = 0; i++ < lo->nslot;) {
2321 + atomic_inc (&lo->islot);
2322 + if (atomic_read (&lo->islot) >= lo->nslot)
2323 + atomic_set (&lo->islot, 0);
2324 + if (lo->slots[atomic_read (&lo->islot)].file)
2328 + lo->harderror = 0;
2329 + write_unlock_irqrestore (&lo->meta_lock, flags);
2331 + /* PTB don't clear whole device queue as we might still be open */
2337 + * PTB - check all slots for old requests and roll them back.
2338 + * At least rollback (which we call takes both the io spinlock and our
2339 + * spinlock, so we can hold neither when we are called.
2341 + * @lo the nbd device to scan
2344 +nbd_rollback_old (struct nbd_device *lo)
2349 + for (islot = 0; islot < lo->nslot; islot++) {
2350 + struct nbd_slot *slot = &lo->slots[islot];
2351 + if (slot->req_age > 0
2352 + && slot->req_age < jiffies - lo->req_timeo * HZ) {
2353 + nbd_rollback_all (slot);
2360 + * PTB - register a socket to a slot.
2361 + * - Return 0 for success and -ve for failure.
2362 + * Nowadays this doesn't do very much! Just finalizes things.
2364 + * @slot the nbd slot being registered
2367 +nbd_set_sock (struct nbd_slot *slot, int arg)
2370 + struct nbd_device *lo = slot->lo;
2371 + int islot = slot->i;
2372 + unsigned long flags;
2373 + int do_enable = 0;
2375 + if (!(atomic_read (&lo->flags) & NBD_INITIALISED)) {
2376 + NBD_ALERT ("(%d) device nd%s not initialised yet!\n",
2377 + islot, lo->devnam);
2380 + if (!(atomic_read (&lo->flags) & NBD_SIZED)) {
2381 + NBD_ALERT ("(%d) device nd%s not sized yet!\n", islot,
2385 + if (!(atomic_read (&lo->flags) & NBD_BLKSIZED)) {
2386 + NBD_ALERT ("(%d) device nd%s not blksized yet!\n", islot,
2390 + if (!(atomic_read (&lo->flags) & NBD_SIGNED)) {
2391 + NBD_ALERT ("(%d) setting unsigned device nd%s! But harmless.\n",
2392 + islot, lo->devnam);
2396 + down (&lo->pid_sem);
2398 + if (slot->pid != current->pid) {
2399 + if (jiffies > slot->cli_age + 2 * HZ * lo->req_timeo) {
2401 + ("(%d) dead client process %d has nd%s%d, erasing pid!\n",
2402 + islot, slot->pid, lo->devnam, islot + 1);
2406 + ("(%d) other live client process %d has nd%s%d!\n",
2407 + islot, slot->pid, lo->devnam, islot + 1);
2409 + up (&lo->pid_sem);
2412 + up (&lo->pid_sem);
2414 + slot = &lo->slots[islot];
2416 + // PTB this is a queue critical code region for the flags business
2417 + write_lock_irqsave (&lo->meta_lock, flags);
2419 + // PTB file has to be nonzero to indicate we are all set up.
2420 + slot->file = (void *) (unsigned long) (arg+1 > 0 ? arg+1 : 1);
2422 + if (islot >= lo->nslot) {
2423 + lo->nslot = islot + 1;
2424 + NBD_INFO ("increased socket count to %d\n", lo->nslot);
2427 + lo->harderror = 0;
2429 + if (lo->disk && !get_capacity(lo->disk)) {
2430 + set_capacity(lo->disk, lo->sectors);
2432 + if (++lo->aslot > 0) {
2435 + // PTB end of queue critical region
2436 + write_unlock_irqrestore (&lo->meta_lock, flags);
2439 + * PTB if this is the first slot, we might call reenable and
2440 + * thus clr queue too, but reenable takes the spinlock
2449 + * PTB - return the index i of 2^i + j, 0 <= j < 2^i
2451 +static inline unsigned
2452 +log2 (unsigned arg)
2455 + while ((arg >>= 1) > 0)
2461 + * PTB - set the blksize in bytes of the block device. Return 0 for
2462 + * - success and -ve for failure.
2465 +nbd_set_blksize (struct nbd_device *lo, unsigned int arg)
2467 + int nbd = lo->nbd;
2468 + if (arg > PAGE_SIZE || arg < 512 || (arg & (arg - 1))) {
2469 + NBD_ERROR ("blksize too big (%u)\n", arg);
2472 + lo->blksize = nbd_blksizes[nbd << NBD_SHIFT] = arg;
2473 + lo->logblksize = log2 (lo->blksize);
2474 + set_blocksize(lo->inode->i_bdev, lo->blksize);
2475 + atomic_set_mask (NBD_BLKSIZED, &lo->flags);
2480 + * PTB - set the size in bytes of the block device. Return 0 for
2481 + * - success and -ve for failure.
2484 +nbd_set_size (struct nbd_device *lo, __u64 arg)
2486 + int nbd = lo->nbd;
2487 + lo->bytesize = nbd_bytesizes[nbd << NBD_SHIFT] = arg;
2488 + lo->size = nbd_sizes[nbd << NBD_SHIFT] = arg >> 10;
2489 + lo->sectors = lo->size << 1;
2490 + if (lo->inode && lo->inode->i_bdev && lo->inode->i_bdev->bd_inode)
2491 + lo->inode->i_bdev->bd_inode->i_size = arg;
2493 + set_capacity (lo->disk, arg >> 9);
2494 + atomic_set_mask (NBD_SIZED, &lo->flags);
2500 +nbd_set_intvl (struct nbd_device *lo, int arg)
2503 + NBD_ERROR ("bad pulse interval/req timeout value (%d)\n", arg);
2506 + lo->req_timeo = arg;
2511 +nbd_set_spid (struct nbd_slot *slot, int arg)
2514 + if (arg < 0 || arg >= (1 << (sizeof (short) * 8))) {
2515 + NBD_ERROR ("bad spid value (%d)\n", arg);
2518 + slot->spid = spid;
2523 +nbd_set_bufferwr (struct nbd_device *lo, int arg)
2526 + atomic_set_mask (NBD_BUFFERWR, &lo->flags);
2528 + atomic_clear_mask (NBD_BUFFERWR, &lo->flags);
2534 +nbd_set_remote_invalid (struct nbd_device *lo, int arg)
2537 + * PTB we handle the event ourself exactly when it happens
2538 + * instead of letting the kernel have check_media defined
2539 + * and doing it there (and reporting 0 to the kernel)
2541 + unsigned long flags;
2542 + int do_invalidate = 0;
2543 + kdev_t dev = mk_kdev (major, lo->nbd << NBD_SHIFT);
2546 + atomic_clear_mask (NBD_REMOTE_INVALID, &lo->flags);
2550 + write_lock_irqsave (&lo->meta_lock, flags);
2551 + if (!(atomic_test_and_set_mask (&lo->flags, NBD_REMOTE_INVALID))) {
2553 + * PTB this tells the kernel that next open
2554 + * should cause recheck .. we'll agree not to
2555 + * say we're happy until VALID is set again
2557 + atomic_clear_mask (NBD_VALIDATED, &lo->flags);
2558 + // PTB test removing partitions
2559 + do_invalidate = 1;
2561 + write_unlock_irqrestore (&lo->meta_lock, flags);
2563 + if (do_invalidate) {
2564 + // PTB destroy buffers
2565 + __invalidate_buffers (dev, 1);
2566 + NBD_ALERT ("invalidating remote on nd%s\n", lo->devnam);
2567 + // PTB - clear buffers now instead of waiting for kernel
2568 + // PTB that will cause requests to start being errored
2569 + invalidate_device (dev, 0);
2575 + * Return the first slot index free when asking for n new ones.
2576 + * If there s no such gap, then NBD_MAXCONN will be returned.
2577 + * The return is always in the same argument address.
2580 +nbd_get_nport (struct nbd_device *lo, int *arg)
2582 + int err, nslot, i;
2584 + if (arg == NULL) {
2589 + err = copy_from_user ((char *) &nslot, arg, sizeof (int));
2594 + for (i = 0; i < NBD_MAXCONN; i++) {
2595 + struct nbd_slot *sloti = &lo->slots[i];
2597 + if (sloti->file) {
2601 + for (j = i; j < NBD_MAXCONN && j < i + nslot; j++) {
2605 + if (j == i + nslot) {
2611 + err = copy_to_user (arg, (char *) &i, sizeof (int));
2617 + * PTB - if we're not signed, accept new sig and return success.
2618 + * - if we are signed, compare the offer and return success if equal,
2619 + * - and -ve for failure.
2621 + * @slot the slot we're working on
2622 + * @sig the string of signature chars (accessed as int *)
2625 +nbd_set_sig (struct nbd_slot *slot, int *sig)
2628 + int buf[NBD_SIGLEN / sizeof (int)];
2629 + int islot = slot->i;
2630 + struct nbd_device *lo = slot->lo;
2632 + if (!access_ok (VERIFY_READ, (char *) sig, NBD_SIGLEN)) {
2633 + NBD_ALERT ("(%d): failed sigcheck with bad user address %p\n",
2638 + down (&lo->pid_sem);
2640 + if (slot->pid == 0) {
2641 + slot->pid = current->pid;
2642 + slot->cli_age = jiffies;
2644 + if (slot->pid != current->pid) {
2645 + if (jiffies > slot->cli_age + 2 * HZ * lo->req_timeo) {
2647 + ("(%d): dead process %d was setting sig, erasing pid\n",
2648 + islot, slot->pid);
2652 + ("(%d): live process %d is trying to set sig\n",
2653 + islot, slot->pid);
2655 + up (&lo->pid_sem);
2659 + if (!(atomic_read (&lo->flags) & NBD_SIGNED)) {
2660 + /* PTB first time grab sig */
2661 + copy_from_user ((char *) lo->signature, (char *) &sig[0],
2663 + atomic_set_mask (NBD_SIGNED, &lo->flags);
2664 + up (&lo->pid_sem);
2667 + copy_from_user ((char *) buf, (char *) &sig[0], NBD_SIGLEN);
2669 + /* PTB test for equality */
2671 + if (memcmp (&buf[0], &lo->signature[0], NBD_SIGLEN / sizeof (int))
2674 + up (&lo->pid_sem);
2675 + NBD_ALERT ("(%d): failed sigcheck wth %d\n", islot, err);
2678 + up (&lo->pid_sem);
2684 + * PTB - register a userspace buffer to a slot. Return 0 for success
2685 + * - and -ve for failure. Null arg acts as erase.
2688 +nbd_reg_buf (struct nbd_slot *slot, char *buffer)
2692 + struct nbd_device *lo = slot->lo;
2695 + slot->flags &= ~NBD_SLOT_BUFFERED;
2696 + slot->buffer = NULL;
2701 + siz = lo->max_sectors << 9;
2703 + /* verify the buffer is in the process space */
2704 + if (!access_ok (VERIFY_WRITE, buffer, siz)) {
2708 + /* PTB hope the buffer is as big as it should be - FIXME */
2709 + slot->buffer = buffer;
2710 + slot->bufsiz = siz;
2712 + /* PTB let the device bufsiz be min of registered nonzero bufsizes */
2713 + if (!lo->bufsiz) {
2717 + if (lo->bufsiz > siz)
2721 + // PTB just in case the buffer really is small, we reset all the
2722 + // kernels request maxima if we have to adjust the device max
2723 + if (lo->max_sectors < (lo->bufsiz >> 9)) {
2725 + lo->max_sectors = lo->bufsiz >> 9;
2726 + for (j = 0; j < NBD_MAXCONN; j++) {
2727 + nbd_max_sectors[(lo->nbd << NBD_SHIFT) + j] =
2732 + slot->flags |= NBD_SLOT_BUFFERED;
2737 + * PTB - this unsets the enabled flag on the device and then clears the
2738 + * - queue for the device.. Call without spinlock.
2740 + * @lo the nbd device to scan
2743 +nbd_disable (struct nbd_device *lo)
2745 + struct nbd_md * md = &nbd_md;
2747 + if (!lo || !(atomic_read (&lo->flags) & NBD_INITIALISED)) {
2748 + NBD_ALERT("nbd_disable called on bad device\n");
2752 + if (atomic_test_and_clear_mask (&lo->flags, NBD_ENABLED)) {
2753 + NBD_ALERT ("disabled device nd%s\n", lo->devnam);
2756 + md->unnotify(md, mk_kdev (major, lo->nbd << NBD_SHIFT));
2758 + // PTB have to recheck partitions on next open
2759 + if (atomic_test_and_clear_mask (&lo->flags, NBD_VALIDATED)) {
2760 + NBD_ALERT ("invalidated device nd%s\n", lo->devnam);
2767 + * PTB - reset the enabled flag on a device and then clear all queues
2768 + * ( call without the spinlock held ) and then enable again.
2771 +nbd_reenable (struct nbd_device *lo)
2776 + if (!(atomic_read (&lo->flags) & NBD_INITIALISED))
2778 + if (lo->aslot <= 0)
2780 + if ((atomic_read (&lo->flags) & NBD_ENABLED))
2783 + m = nbd_clr_queue (lo);
2784 + // PTB - have to call clr_kernel_queue without the io_spinlock held
2785 + n = nbd_clr_kernel_queue (lo);
2791 + * This function launches a thread which wakes for a signal to reenable
2792 + * the device, and then sets the timer to deleiver the signal.
2795 +nbd_reenable_delay (struct nbd_device *lo, int delay)
2797 + write_lock (&lo->meta_lock);
2798 + if (lo->reenable_time == 0)
2799 + lo->reenable_time = jiffies + delay * HZ;
2800 + write_unlock (&lo->meta_lock);
2807 + * PTB - drains device queue. Disables device.
2808 + * At least rollback (which we call takes both the io spinlock and our
2809 + * spinlock, so we can hold neither when we are called. Also
2810 + * invalidate buffers, on request of Rogier Wolff.
2813 +nbd_soft_reset (struct nbd_device *lo)
2816 + const int max_clrq_retries = 100;
2817 + if (!(atomic_read (&lo->flags) & NBD_INITIALISED) || lo->nslot <= 0) {
2821 + * PTB We push back the requests in the slot, in order to be able to
2822 + * vamoosh them in a moment. This is a race, surely? We ought to
2823 + * do this atomically or dsiable the slots first.
2825 + for (j = 0; j < lo->nslot; j++) {
2826 + struct nbd_slot *slot = &lo->slots[j];
2827 + nbd_rollback_all (slot);
2829 + // PTB disable unsets the nabled flag and clears the queue
2831 + for (j = 0; j < max_clrq_retries; j++) {
2832 + int m = nbd_clr_queue (lo);
2836 + // PTB this would unsign the device: lo->flags &= ~NBD_SIGNED;
2839 + * PTB put back invalidate buffers for use when called from
2840 + * clr_sock from nbd_release on request of Rogier Wolff.
2842 + for (j = 0; j < lo->nslot; j++) {
2843 + invalidate_buffers (mk_kdev(major, (lo->nbd << NBD_SHIFT) + j));
2849 + * PTB - added a device/module reset for tidyness in face of rampant hacking
2850 + * - this does a soft_reset of all devices, followed bu a clr sock
2851 + * - on each, and then clears the kernel queue. It unsets the
2852 + * - enabled flag on each device.
2853 + * We have to be called without either the spinlock or the
2854 + * spinlock held, as we call soft_reset which takes both, as
2858 +nbd_hard_reset (struct nbd_device *lo)
2863 + for (i = 0; i < MAX_NBD; i++) {
2864 + struct nbd_device *lo = &nbd_dev[i];
2866 + if (!lo->file || !lo->inode)
2868 + if (!(atomic_read(&lo->flags)&NBD_INITIALISED))
2870 + nbd_soft_reset (lo);
2871 + for (j = 0; j < lo->nslot; j++) {
2872 + struct nbd_slot *slot = &lo->slots[j];
2873 + // PTB this takes the io spinlock and our spinlock.
2874 + nbd_clr_sock (slot);
2876 + // PTB - call clr_kernel_queue without the io_spinlock held
2877 + nbd_clr_kernel_queue (lo);
2884 +indirect_ioctl_load (struct request *req, int cmd, char * buf)
2889 + struct nbd_ioctl *remote_ioctl = nbd_remote_ioctl.remote;
2891 + if (!remote_ioctl)
2894 + size = remote_ioctl->size_user (cmd, buf);
2897 + // PTB unauthorized ioctl
2903 + // PTB we never use the nbd devices small buffer now
2904 + req->nr_sectors = 0;
2905 + req->buffer = NULL;
2909 + // PTB we have to use an extra buffer or else block
2910 + // here and rendezvous directly with the get_req call
2911 + req->nr_sectors = (size + 511) >> 9;
2912 + req->buffer = kmalloc(req->nr_sectors << 9, GFP_KERNEL);
2914 + if (!req->buffer) {
2919 + if (_IOC_DIR (cmd) & _IOC_WRITE) {
2921 + remote_ioctl->cp_from_user (cmd, req->buffer, buf, size);
2923 + kfree (req->buffer);
2930 + req->buffer = NULL;
2931 + req->nr_sectors =0;
2936 +indirect_ioctl_store (struct request *req, int cmd, char * buf,
2940 + struct nbd_ioctl * remote_ioctl = nbd_remote_ioctl.remote;
2942 + if (!remote_ioctl)
2948 + // PTB if we are reading, it should be to the local buffer
2949 + // PTB the buffer points at a kmalloced area
2953 + err = remote_ioctl->cp_to_user (cmd, buf, req->buffer, size);
2954 + kfree (req->buffer);
2961 +do_nbd_remote_ioctl(struct nbd_device *lo, int minor, int cmd, unsigned long arg) {
2963 + unsigned start_time, timeout;
2966 + struct request * req;
2967 + struct completion x;
2970 + * PTB here we have to treat remote ioctls. We should probably make
2971 + * a request and put it on the local queue, but where can we get
2972 + * the request from? We might have to keep one in reserve.
2973 + * That's not a bad idea, because
2974 + * we generate it here and we delete it here, and the daemon code
2975 + * is all set up to read that sort of thing. So that's what we do ...
2978 + timeout = lo->req_timeo * HZ;
2979 + start_time = jiffies;
2981 + while (!(req = blk_get_request(lo->q,WRITE,0))) {
2982 + if (jiffies >= start_time + timeout) {
2983 + // PTB it takes too long
2985 + ("took too long to get a spare ioctl req: TIMEOUT\n");
2988 + err = interruptible_sleep_on_timeout (&lo->req_wq,
2990 + timeout - jiffies);
2993 + set_rq_type(req, IOCTL);
2997 + // PTB this is the fixed-up command
2998 + req->special = (void *) cmd;
3001 + * PTB this is (arg if it is direct, else) the address of a local buffer
3002 + * PTB we need to store the arg or its dereference somewhere local
3003 + * for a while until the cnb-client thread can enter and pick it
3004 + * up. The alternative is to block the ioctl here until it is
3005 + * picked up, which IS possible.
3008 + if (_IOC_DIR (cmd) & _IOC_READ) {
3010 + size = indirect_ioctl_load (req, cmd, (char *)arg);
3015 + // PTB direct - we just need to remember the value
3017 + req->buffer = (char *) arg;
3020 + // PTB point the request buffer vaguely in the direction of where
3021 + // the data is, but it does not matter.
3022 + req->rq_disk = lo->disk;
3024 + // PTB we queue the request for treatment and wait till treated
3025 + init_completion(&x);
3026 + req->waiting = &x;
3027 + nbd_enqueue (lo, req);
3029 + for (err = 0; err <= 0; err = wait_for_completion_timeout(&x, 1)) {
3032 + * PTB on slot or queue? Don't know. Only want
3033 + * to vamoosh it if its on queue, not slot
3035 + struct list_head *pos;
3036 + int time_left = start_time + timeout - jiffies;
3037 + // PTB del_req will be run with queue_lock held
3038 + static void delete_req(void) {
3040 + // PTB change countq only under this lock
3041 + if (! (req->flags & REQ_SPECIAL)) {
3042 + write_lock(&lo->altqueue_lock);
3043 + // PTB reverse inadvertent accounting in enqueue
3044 + atomic_dec (&lo->countq[rq_data_dir(req)]);
3045 + write_unlock(&lo->altqueue_lock);
3048 + list_del_init (&req->queuelist);
3050 + req->errors = -ETIME;
3051 + if (req->nr_sectors > 0 && req->buffer) {
3052 + kfree (req->buffer);
3053 + req->buffer = NULL;
3057 + if (time_left > 0)
3060 + // PTB find req on list and delete it
3061 + write_lock (&lo->queue_lock);
3062 + list_for_each (pos, &lo->queue) {
3064 + if (req != list_entry (pos, struct request, queuelist))
3068 + write_unlock (&lo->queue_lock);
3070 + ("took too long to treat queued ioctl: TIMEOUT\n");
3074 + write_unlock (&lo->queue_lock);
3076 + } // end while loop
3079 + if (_IOC_DIR (cmd) & _IOC_READ) {
3080 + err = indirect_ioctl_store(req, cmd, (char *)arg, size);
3086 + if (req->errors != 0) {
3087 + err = req->errors;
3088 + err = err < 0 ? err : -EINVAL;
3093 + blk_put_request(req);
3099 +find_slot (struct nbd_device *lo, int pid)
3103 + for (i = 0; i < NBD_MAXCONN; i++) {
3104 + struct nbd_slot * slot = &lo->slots[i];
3105 + if (slot->pid == pid)
3108 + if (i < NBD_MAXCONN)
3109 + return i; // found it
3115 +fixup_slot (struct nbd_device *lo, int islot, unsigned int cmd, unsigned long *arg)
3121 + // PTB get slot info from parameter if not given
3122 + case NBD_CLEAR_SOCK:
3123 + case MY_NBD_CLR_REQ:
3124 + case MY_NBD_ERR_REQ:
3125 + // see if we match a known slot pid
3126 + if (arg && *arg == 0) {
3127 + islot = find_slot (lo, current->pid);
3132 + ("failed to find slot for pid %d for ioctl %x arg %lx\n",
3133 + current->pid, cmd, *arg);
3134 + return islot = -1;
3137 + // PTB get the slot from the 16 high bits
3138 + case NBD_SET_SOCK:
3139 + case MY_NBD_SET_SPID:
3140 + intval = *arg >> ((sizeof (int) - sizeof (short)) * 8);
3141 + intval &= (1 << (sizeof (short) * 8)) - 1;
3142 + if (intval == 0) {
3143 + // no clue in the pid high bits. Search
3144 + islot = find_slot (lo, current->pid);
3146 + // PTB change arg !!
3147 + *arg &= (1 << (sizeof (short) * 8)) - 1;
3148 + return islot; // found it
3153 + ("failed to find slot for pid %d for ioctl %x arg %lx\n",
3154 + current->pid, cmd, *arg);
3155 + return islot = -1;
3158 + case MY_NBD_GET_REQ:
3160 + islot = find_slot (lo, current->pid);
3164 + ("failed to find slot for pid %d for ioctl %x arg %lx\n",
3165 + current->pid, cmd, *arg);
3169 + case MY_NBD_REG_BUF:
3170 + case MY_NBD_SET_SIG:
3171 + islot = find_slot (lo, current->pid);
3175 + * PTB Otherwise they passed a buffer
3176 + * and the slot number is in the first 4B
3177 + * We need some magic here for safety!
3178 + * set sig is the only call that really needs
3179 + * to send its pid!
3183 + if (!arg || !*arg || get_user (intval, (int *) *arg)
3185 + || intval > NBD_MAXCONN) {
3187 + ("failed to find slot for pid %d ioctl %x arg %lx\n",
3188 + current->pid, cmd, *arg);
3189 + return islot = -1;
3191 + islot = intval - 1;
3193 + // PTB CHANGE ARG !!!!
3194 + *arg += sizeof (int);
3199 + return islot = -1;
3203 + * PTB - generic ioctl handling
3206 +nbd_ioctl (struct inode *inode, struct file *file,
3207 + unsigned int cmd, unsigned long arg)
3209 + struct nbd_device *lo
3210 + = NULL; // PTB device pointer
3211 + int minor = -1; // PTB minor on which we got the ioctl
3212 + int islot = -1; // PTB slot number 0, 1, ...
3213 + int nbd = -1; // PTB the count for the device group
3214 + struct nbd_slot *slot
3215 + = NULL; // PTB slot pointer
3218 + if (!capable(CAP_SYS_ADMIN)) {
3219 + NBD_ERROR ("caller must be root.\n");
3223 + NBD_ERROR ("given bad inode.\n");
3226 + if (major (inode->i_rdev) != major) {
3227 + NBD_ERROR ("pseudo-major %d != %d\n",
3228 + major (inode->i_rdev), major);
3231 + minor = minor (inode->i_rdev);
3232 + nbd = minor >> NBD_SHIFT;
3233 + if (nbd >= MAX_NBD) {
3234 + NBD_ERROR ("tried to open too many devices, %d\n", minor);
3237 + lo = &nbd_dev[nbd];
3238 + lo->harderror = 0;
3239 + islot = minor % NBD_MAXCONN - 1;
3242 + * PTB fixup breakage >= 2.5.44 caused by not being allowed to talk to
3243 + * minors. We deduce the slot number from hints in the call.
3244 + * Or we match against the known pids.
3247 + islot = fixup_slot(lo, islot, cmd, &arg);
3250 + slot = & lo->slots[islot];
3253 + // PTB these are all always local ioctls
3259 + case NBD_CLEAR_SOCK:
3261 + NBD_ALERT ("CLEAR_SOCK called on full device nd%s arg %lx\n",
3265 + err = nbd_clr_sock (slot);
3268 + case NBD_SET_SOCK:
3270 + NBD_ALERT ("SET_SOCK called on full device nd%s arg %lx\n",
3274 + err = nbd_set_sock (slot, arg);
3278 + // PTB The kernel should intercept this
3279 + NBD_ALERT ("attempted get_blksize with BLKBSZGET\n");
3282 + case NBD_GET_BLKSIZE:
3283 + if (!(atomic_read (&lo->flags) & NBD_BLKSIZED)) {
3286 + err = put_user (lo->blksize, (long *) arg);
3290 + // PTB The kernel should have intercepted this
3291 + NBD_ALERT ("attempted set_blksize with BLKBSZSET\n");
3294 + case NBD_SET_BLKSIZE:
3298 + if (get_user (intval, (int *)arg))
3300 + if (intval == -1) {
3301 + NBD_ALERT ("BLKBSZSET got %d from user\n", intval);
3303 + err = nbd_set_blksize (lo, intval);
3306 + case NBD_SET_SIZE:
3307 + err = nbd_set_size (lo, (__u64) arg);
3310 + case NBD_SET_SECTORS:
3311 + err = nbd_set_size (lo, ((__u64) arg) << 9);
3314 + case MY_NBD_SET_INTVL: /* WG */
3315 + err = nbd_set_intvl (lo, arg);
3318 + case MY_NBD_SET_SPID:
3320 + NBD_ALERT ("SET_SPID called on full device nd%s\n",
3324 + err = nbd_set_spid (slot, arg);
3327 + case MY_NBD_SET_BUFFERWR:
3328 + err = nbd_set_bufferwr (lo, arg);
3331 + case MY_NBD_REG_BUF: /* PTB register your buffer per socket here */
3333 + /* PTB serves as existence check for this ioctl */
3337 + NBD_ALERT ("REG_BUF called on full device nd%s\n",
3341 + err = nbd_reg_buf (slot, (char *) arg);
3344 + case MY_NBD_SET_SIG:
3346 + NBD_ALERT ("SET_SIG called on full device nd%s\n",
3350 + err = nbd_set_sig (slot, (int *) arg);
3353 + case MY_NBD_GET_REQ:
3355 + NBD_ALERT ("GET_REQ called on full device nd%s\n",
3360 + arg = (unsigned)slot->buffer;
3364 + err = nbd_get_req (slot);
3367 + case MY_NBD_GET_NPORT:
3368 + err = nbd_get_nport (lo, (int *) arg);
3371 + case MY_NBD_CLR_REQ:
3373 + NBD_ALERT ("CLR_REQ called on full device nd%s\n",
3377 + nbd_rollback_all (slot);
3380 + case MY_NBD_ERR_REQ:
3382 + NBD_ALERT ("ERR_REQ called on full device nd%s\n",
3386 + nbd_error_all (slot);
3391 + // PTB maybe run the reenable function
3393 + write_lock(&lo->meta_lock);
3394 + if (lo->reenable_time != 0
3395 + && time_before(lo->reenable_time,jiffies)) {
3396 + lo->reenable_time = 0;
3399 + write_unlock(&lo->meta_lock);
3403 + // PTB error too old reqs if show_errs set, else roll them back
3404 + nbd_rollback_old (lo);
3406 + // PTB opportunity to calculate speed
3407 + nbd_set_speed (lo);
3413 + NBD_ALERT ("NBD_ACK called on full device nd%s\n",
3417 + err = nbd_ack (slot);
3420 + /* let this be compiled in always - it's useful. PTB */
3421 + case NBD_PRINT_DEBUG:
3422 + NBD_INFO("device %d: hd = %p, tl = %p, in = %d, out = %d\n",
3424 + list_head (&lo->queue, struct request, queuelist),
3425 + list_tail (&lo->queue, struct request, queuelist),
3426 + atomic_read (&lo->requests_in[READ]) +
3427 + atomic_read (&lo->requests_in[WRITE]),
3428 + atomic_read (&lo->requests_out[READ]) +
3429 + atomic_read (&lo->requests_out[WRITE])
3433 + case NBD_HARD_RESET: /* PTB - debugging */
3434 + err = nbd_hard_reset (lo);
3437 + case NBD_RESET: /* PTB - debugging */
3438 + err = nbd_soft_reset (lo);
3439 + // PTB we reenable in 5s
3440 + nbd_reenable_delay(lo, 5);
3443 + case NBD_SET_MD5SUM: /* PTB - change to do/plead md5summing */
3445 + atomic_set_mask (NBD_MD5SUM, &lo->flags);
3447 + atomic_clear_mask (NBD_MD5SUM, &lo->flags);
3452 + case MY_NBD_SET_SHOW_ERRS: /* PTB/WG - change show error status */
3454 + atomic_set_mask (NBD_SHOW_ERRS, &lo->flags);
3456 + atomic_clear_mask (NBD_SHOW_ERRS, &lo->flags);
3460 + case MY_NBD_SET_DIRECT: /* PTB - change o_direct status */
3462 + atomic_set_mask (NBD_DIRECT, &lo->flags);
3464 + atomic_clear_mask (NBD_DIRECT, &lo->flags);
3468 + case MY_NBD_INVALIDATE:
3469 + err = nbd_set_remote_invalid (lo, (int) arg);
3472 + case NBD_SET_PF_MEMALLOC:
3474 + current->flags |= PF_MEMALLOC;
3476 + current->flags &= ~PF_MEMALLOC;
3481 + // PTB these are the standard ioctls, and we might get them from
3488 + case BLKROSET: /* PTB - change ro status */
3489 + if (get_user(intval, (int*)arg))
3491 + // PTB local flags
3492 + nbd_set_read_only(lo, intval);
3496 + intval = nbd_read_only(lo);
3497 + return put_user(intval, (int*)arg);
3500 + nbd_maybe_sync_sync (lo); // PTB normally fsync_dev
3501 + // PTB device likely has buffers or caches in kernel
3502 + invalidate_buffers (inode->i_rdev);
3503 +#ifndef NO_BUFFERED_WRITES
3504 + if (atomic_read (&lo->flags) & NBD_BUFFERWR) {
3505 + // PTB got this from rd.c
3506 + // PTB destroy buffers
3507 + __invalidate_buffers (inode->i_rdev, 1);
3509 +#endif /* NO_BUFFERED_WRITES */
3516 + struct hd_geometry *geo =
3517 + (struct hd_geometry *) arg;
3518 + int sectors = nbd_sizes[nbd << NBD_SHIFT] << 1;
3520 + unsigned char h, s;
3521 + if (sectors < (1 << 22)) {
3528 + c = (sectors / h) / s;
3531 + if ((err = put_user (c, &geo->cylinders), err < 0)
3532 + || (err = put_user (h, &geo->heads), err < 0)
3533 + || (err = put_user (s, &geo->sectors), err < 0)
3534 + || (err = put_user (h, &geo->start), err < 0)) {
3541 +#define BLKMDNTFY _IOW(0x12,133,sizeof(int))
3544 + NBD_INFO ("received BLKMDNTFY, am now in raid %x\n",
3546 + nbd_md.inc(&nbd_md);
3550 +#define BLKMDUNTFY _IOW(0x12,134,sizeof(int))
3553 + NBD_INFO ("received BLKMDUNTFY, now out of raid %x\n",
3555 + nbd_md.dec(&nbd_md);
3559 +#define BLKMDRGTR _IOW(0x12,135,sizeof(unsigned long))
3562 + nbd_md.reg(&nbd_md, (int(*)(kdev_t, int))arg);
3567 + if (nbd_remote_ioctl.remote != NULL) {
3568 + struct nbd_ioctl *remote_ioctl = nbd_remote_ioctl.remote;
3570 + if (remote_ioctl->convert_inplace (&cmd) < 0) {
3571 + NBD_ALERT ("unauthorized ioctl %#x\n", cmd);
3575 + err = do_nbd_remote_ioctl(lo, minor, cmd, arg);
3582 + * PTB - release the device. This happens when the last process closes
3586 +nbd_release (struct inode *inode, struct file *file)
3588 + struct nbd_device *lo;
3594 + NBD_ALERT ("null inode.\n");
3597 + dev = minor (inode->i_rdev);
3598 + nbd = dev >> NBD_SHIFT;
3600 + if (nbd >= MAX_NBD) {
3602 + NBD_ALERT ("too many open devices.\n");
3606 + lo = &nbd_dev[nbd];
3608 + islot = dev % NBD_MAXCONN - 1;
3610 + // PTB it is a daemon closing the slot?
3611 + if (islot >= 0 || (islot = find_slot(lo, current->pid), islot >= 0)) {
3612 + struct nbd_slot *slot = &lo->slots[islot];
3614 + if (slot->pid == current->pid) {
3616 + nbd_clr_sock (slot);
3617 + NBD_ALERT ("(%d): erasing slot pid %d\n", islot, slot->pid);
3619 + if (slot->refcnt > 0) {
3621 + ("slot owner process %d released slot nd%s%d while not last\n",
3622 + slot->pid, lo->devnam, islot + 1);
3627 + /* POSSIBLE change socket here PTB */
3629 + atomic_dec (&lo->refcnt);
3631 + // PTB invalidate buffers on last close if show_err set
3632 + if (atomic_read (&lo->refcnt) <= 0 || !module_is_live(THIS_MODULE)) {
3633 + struct nbd_seqno * seqno_out = &lo->seqno_out;
3634 + //invalidate_buffers (lo->inode->i_rdev);
3635 + if (atomic_read (&lo->flags) & NBD_SHOW_ERRS) {
3636 + invalidate_buffers (mk_kdev (major, nbd << NBD_SHIFT));
3638 + // PTB in any case the daemons are dead!
3640 + seqno_out->reset(seqno_out);
3644 + && (file->f_flags & O_DIRECT)
3645 + // PTB we set this to show we made iobuf
3646 + && (file->f_flags & O_NOFOLLOW)) {
3647 + file->f_flags &= ~(O_DIRECT|O_NOFOLLOW);
3654 +nbd_media_changed(struct gendisk *disk) {
3655 + struct nbd_device *lo = disk->private_data;
3656 + if (!lo || lo->magic != NBD_DEV_MAGIC)
3658 + NBD_ALERT("nbd_media_changed called on nd%s\n", lo->devnam);
3659 + return (atomic_read (&lo->flags) & NBD_VALIDATED) == 0;
3663 +nbd_revalidate(struct gendisk *disk) {
3664 + struct nbd_device *lo = disk->private_data;
3665 + unsigned long flags;
3666 + int err = -EINVAL;
3668 + if (!lo || lo->magic != NBD_DEV_MAGIC){
3671 + // PTB reenable part
3672 + NBD_ALERT("revalidate called on nd%s\n", lo->devnam);
3673 + write_lock_irqsave (&lo->meta_lock, flags);
3674 + if (! (atomic_read (&lo->flags) & NBD_REMOTE_INVALID)
3675 + && (atomic_read (&lo->flags) & NBD_ENABLED)) {
3676 + atomic_set_mask (NBD_VALIDATED, &lo->flags);
3679 + write_unlock_irqrestore (&lo->meta_lock, flags);
3684 +static struct block_device_operations nbd_blkops = {
3685 + owner: THIS_MODULE,
3687 + release: nbd_release,
3689 + media_changed: nbd_media_changed,
3690 + revalidate_disk: nbd_revalidate,
3693 +static struct gendisk *
3694 +nbd_find (dev_t dev, int *part, void *data)
3696 + struct nbd_device *lo = data;
3699 + if (lo->magic != NBD_DEV_MAGIC)
3704 + NBD_ALERT("nbd_find called with part = %#x\n", (unsigned)*part);
3705 + if (part && (*part < 0 || *part >= NBD_MAXCONN))
3707 + return get_disk (lo->disk);
3712 +nbd_set_disk (struct nbd_device *lo, unsigned first_minor, unsigned npart)
3714 + struct gendisk * disk = lo->disk;
3716 + lo->disk = disk = alloc_disk (npart);
3718 + disk->major = major;
3719 + disk->first_minor = first_minor;
3720 + disk->fops = &nbd_blkops;
3721 + disk->private_data = lo;
3722 + disk->queue = lo->q;
3723 + sprintf (disk->disk_name, "nd%s", lo->devnam);
3724 + // have to set minors (or capacity) to 1 (0) to avoid check disk
3725 + set_capacity (disk, 0);
3727 + blk_register_region(MKDEV(major, first_minor),
3728 + npart, THIS_MODULE, nbd_find, NULL, lo);
3729 + set_capacity (disk, lo->bytesize >> 9);
3730 + // we should rescan later. From userland?
3734 + NBD_ERROR ("Insufficient memory for partition structs\n");
3739 + * Pavel - And here should be modules and kernel interface
3740 + * (Just smiley confuses emacs :-)
3745 +nbd_reset(struct nbd_device *lo, int i) {
3749 + if (i < 0 || i >= MAX_NBD)
3751 + lo->magic = NBD_DEV_MAGIC;
3752 + strncpy (lo->devnam, device_letter (i), 4);
3753 + for (j = 0; j < NBD_MAXCONN; j++) { /* PTB */
3754 + struct nbd_slot *slot = &lo->slots[j];
3757 + INIT_LIST_HEAD (&slot->queue);
3759 + lo->blksize = 1024; /* PTB 132 */
3760 + lo->logblksize = 10; /* PTB */
3761 + lo->bytesize = 0x7fffffff00000; /* PTB 132 */
3762 + lo->size = 0x7fffffff; /* PTB (bytesizes >> 10) */
3763 + lo->sectors = 0xfffffffe; /* PTB sectors */
3765 + lo->req_timeo = NBD_REQ_TIMEO; /* PTB default pulse intvl */
3766 + lo->max_sectors = buf_sectors;
3768 + lo->enable = nbd_enable;
3769 + lo->reset = nbd_reset;
3770 + lo->disable = nbd_disable;
3771 + lo->read_only = nbd_read_only;
3772 + lo->set_speed = nbd_set_speed;
3773 + lo->hard_reset = nbd_hard_reset;
3774 + lo->soft_reset = nbd_soft_reset;
3775 + lo->reenable_delay = nbd_reenable_delay;
3777 + INIT_LIST_HEAD (&lo->queue);
3778 + INIT_LIST_HEAD (&lo->altqueue);
3779 + init_waitqueue_head (&lo->wq);
3780 + init_waitqueue_head (&lo->req_wq);
3781 + init_MUTEX(&lo->pid_sem);
3782 + rwlock_init (&lo->queue_lock);
3783 + rwlock_init (&lo->altqueue_lock);
3784 + rwlock_init (&lo->meta_lock);
3785 + for (j = 0; j < NBD_MAXCONN; j++) {
3786 + nbd_blksizes[i * NBD_MAXCONN + j] = lo->blksize;
3787 + nbd_bytesizes[i * NBD_MAXCONN + j] = lo->bytesize;
3788 + nbd_sizes[i * NBD_MAXCONN + j] = lo->size;
3789 + nbd_max_sectors[i * NBD_MAXCONN + j] = lo->max_sectors;
3791 + nbd_init_seqno(&lo->seqno_out);
3792 + nbd_init_speed(&lo->rspeed);
3793 + nbd_init_speed(&lo->wspeed);
3794 + nbd_init_speed(&lo->tspeed);
3796 + // PTB queuue has alreay been initialized, or will be
3797 + lo->q = nbd_queue;
3800 + atomic_set_mask (NBD_MD5SUM, &lo->flags);
3803 + atomic_set_mask (NBD_SYNC, &lo->flags);
3806 + atomic_set_mask (NBD_SHOW_ERRS, &lo->flags);
3809 + atomic_set_mask (NBD_DIRECT, &lo->flags);
3811 + if (buffer_writes) {
3812 + atomic_set_mask (NBD_BUFFERWR, &lo->flags);
3814 + if (merge_requests) {
3815 + atomic_set(&lo->merge_requests, merge_requests);
3820 +MODULE_AUTHOR ("Peter T. Breuer, Andres Marin");
3821 +MODULE_DESCRIPTION ("Enhanced Network Block Device " NBD_VERSION);
3822 +MODULE_LICENSE ("GPL");
3823 +#endif /* MODULE */
3825 +// PTB we steal these from the queue struct at init
3826 +static merge_requests_fn *ll_merge_requests_fn;
3827 +static merge_request_fn *ll_front_merge_fn;
3828 +static merge_request_fn *ll_back_merge_fn;
3831 + * These functions are needed when the kernel does request merging in
3832 + * order to stop it making requests that are bigger than our buffer.
3834 + * To turn OFF merging (once these functions are in place), set
3835 + * merge_requests=0.
3838 +nbd_merge_requests_fn (request_queue_t * q, struct request *req,
3839 + struct request *req2)
3841 + struct nbd_device *lo = req->rq_disk->private_data;
3843 + if (!atomic_read(&lo->merge_requests))
3846 + if (!ll_merge_requests_fn)
3849 + if (req->nr_sectors + req2->nr_sectors > lo->max_sectors)
3852 + if (req->nr_sectors + req2->nr_sectors >
3853 + ((atomic_read(&lo->merge_requests) + 1) << (lo->logblksize - 9)))
3856 + return ll_merge_requests_fn (q, req, req2);
3859 +nbd_front_merge_fn (request_queue_t * q, struct request *req, struct bio * bio)
3861 + struct nbd_device *lo = req->rq_disk->private_data;
3863 + if (!atomic_read(&lo->merge_requests))
3866 + if (!ll_front_merge_fn)
3869 + if (req->nr_sectors > lo->max_sectors)
3872 + if (req->nr_sectors > ((atomic_read(&lo->merge_requests) + 1) << (lo->logblksize - 9)))
3875 + return ll_front_merge_fn (q, req, bio);
3878 +nbd_back_merge_fn (request_queue_t * q, struct request *req,
3881 + struct nbd_device *lo = req->rq_disk->private_data;
3883 + if (!atomic_read(&lo->merge_requests))
3886 + if (!ll_back_merge_fn)
3889 + if (req->nr_sectors > lo->max_sectors)
3892 + if (req->nr_sectors >
3893 + ((atomic_read(&lo->merge_requests) + 1) << (lo->logblksize - 9))) return 0;
3895 + return ll_back_merge_fn (q, req, bio);
3898 +// PTB - and now to play with the sysctl interface ...
3899 +static struct ctl_table_header *nbd_table_header;
3900 +// the above was set by the register call of the root table
3901 +static ctl_table nbd_table[] = {
3903 + &rahead, sizeof (int), 0644, NULL, &proc_dointvec},
3905 + &plug, sizeof (int), 0644, NULL, &proc_dointvec},
3907 + &sync_intvl, sizeof (int), 0644, NULL, &proc_dointvec},
3908 + {4, "merge_requests",
3909 + &merge_requests, sizeof (int), 0644, NULL, &proc_dointvec},
3911 + &md5sum, sizeof (int), 0644, NULL, &proc_dointvec},
3912 + {8, "md5_on_threshold",
3913 + &md5_on_threshold, sizeof (int), 0644, NULL, &proc_dointvec},
3914 + {9, "md5_off_threshold",
3915 + &md5_off_threshold, sizeof (int), 0644, NULL, &proc_dointvec},
3918 +static ctl_table nbd_dir_table[] = {
3919 + {6, "enbd", NULL, 0, 0555, nbd_table},
3922 +static ctl_table nbd_root_table[] = {
3923 + {CTL_DEV, "dev", NULL, 0, 0555, nbd_dir_table},
3927 +#ifdef CONFIG_DEVFS_FS
3928 +static devfs_handle_t devfs_handle;
3929 +static devfs_handle_t devfs_handles[MAX_NBD];
3938 + struct proc_dir_entry *res;
3940 + NBD_INFO ("Network Block Device originally by pavel@elf.mj.gts.cz\n");
3941 + NBD_INFO ("Network Block Device port to 2.0 by ptb@it.uc3m.es\n");
3942 + NBD_INFO ("Network Block Device move networking to user space by "
3943 + "amarin@it.uc3m.es\n");
3944 + NBD_INFO ("Enhanced Network Block Device " NBD_VERSION " by "
3945 + "ptb@it.uc3m.es\n");
3947 + nbd_queue = kmalloc(sizeof(*nbd_queue), GFP_KERNEL);
3951 + for (i = 0; i < MAX_NBD; i++) {
3952 + struct nbd_device *lo = &nbd_dev[i];
3953 + struct gendisk *disk = alloc_disk(NBD_MAXCONN);
3954 + memset (lo, 0, sizeof (*lo));
3959 + if (register_blkdev (major, "nbd", &nbd_blkops)) {
3960 + NBD_ERROR ("Unable to register major number %d for NBD\n",
3965 + NBD_INFO ("registered device at major %d\n", major);
3969 +// PTB - set up kernel queue struct with default methods
3970 + blk_init_queue (nbd_queue, do_nbd_request, &nbd_lock);
3972 + blk_queue_max_sectors(nbd_queue, buf_sectors); /* max per request */
3975 + * PTB - I think that put:
3976 + * - q->plug_device_fn = generic_plug_device (static ll_rw_blk)
3977 + * - q->plug_tq.routine = generic_unplug_device (static ll_rw_blk)
3978 + * - q->back_merge_fn = ll_back_merge_fn (static ll_rw_blk)
3979 + * - q->front_merge_fn = ll_front_merge_fn (static ll_rw_blk)
3980 + * - q->merge_requests_fn = ll_merge_requests_fn (static ll_rw_blk)
3981 + * - q->request_fn = do_nbd_request (param)
3985 + * PTB - we have to do some more init magic in 2.4.*. This says that we
3986 + * - take all stuff off the kernel queue before processing it, so in
3987 + * - particular iti s OK for kernel to do merges with the queue head.
3988 + * blk_queue_headactive (nbd_queue, 0);
3992 + * LA - moved the next #if higher;
3993 + * - kernel 2.2.* doesn't know about plug_device_fn
3996 + // PTB control merge attempts so we do not overflow our buffer
3997 + ll_merge_requests_fn = nbd_queue->merge_requests_fn;
3998 + ll_front_merge_fn = nbd_queue->front_merge_fn;
3999 + ll_back_merge_fn = nbd_queue->back_merge_fn;
4001 +// JSA - Add this line because under >=2.4.1, merge optimizations are in flux
4003 + * PTB - however it's not this which does damage, I believe. Data: plugging
4004 + * - simply has to be enabled in these kernels. Without it, requests just
4005 + * - sit on the kernel queue and never come off and into our request_fn.
4006 + * PTB - commented the ifdef again after talks with Jens Axboe.
4007 + * - Apparently plug_fn will disappear in 2.4.4 and merge functions are
4008 + * the only way to control merges, so they MUST be included.
4012 + * PTB - The functions below just impose our own stricter size limit before
4013 + * - calling the defaults if all seems OK sizewise.
4015 + nbd_queue->merge_requests_fn = &nbd_merge_requests_fn;
4016 + nbd_queue->front_merge_fn = &nbd_front_merge_fn;
4017 + nbd_queue->back_merge_fn = &nbd_back_merge_fn;
4019 + nbd_init_md(&nbd_md);
4020 + nbd_init_ioctl_stub(&nbd_remote_ioctl);
4022 + for (i = 0; i < MAX_NBD; i++) {
4023 + struct nbd_device *lo = &nbd_dev[i];
4028 + * PTB we do the disk and partition stuff after we have
4029 + * contact, when nbd_open is called for the first time?
4032 + res = create_proc_read_entry ("nbdinfo", 0, NULL, NULL, NULL);
4034 + NBD_ALERT ("creation of proc entry failed\n");
4037 + // PTB additional write_proc entry in struct
4038 + nbd_init_proc(res);
4040 + // PTB make the gendisk structs very late.
4041 + for (i = 0; i < MAX_NBD; i++) {
4042 + struct nbd_device *lo = &nbd_dev[i];
4043 + nbd_set_disk(lo, i * NBD_MAXCONN, NBD_MAXCONN);
4046 +#ifdef CONFIG_DEVFS_FS
4048 + devfs_handle = devfs_mk_dir (NULL, "nd", NULL);
4049 + if (devfs_handle) {
4050 + for (i = 0; i < MAX_NBD; i++) {
4051 + struct nbd_device *lo = &nbd_dev[i];
4053 + // PTB make the subdirectory "a","b" etc.
4054 + devfs_handles[i] =
4055 + devfs_mk_dir (devfs_handle, lo->devnam, NULL);
4056 + // PTB add the blk specials, "0","1" to NBD_MAXCONN-1
4057 + if (!devfs_handles[i])
4059 + for (j = 0; j < MAX_NBD; j++) {
4061 + sprintf (name, "%u", j);
4062 + devfs_register(devfs_handles[i], name,
4064 + major, i * NBD_MAXCONN + j,
4065 + S_IFBLK | S_IRUSR | S_IWUSR,
4066 + &nbd_blkops, NULL);
4068 + // PTB do the whole disk symlink ..
4069 + devfs_mk_symlink (devfs_handles[i], "disk",
4070 + DEVFS_FL_DEFAULT, "0",
4072 + // PTB .. and the channel symlinks
4073 + for (j = 1; j < MAX_NBD; j++) {
4076 + sprintf (link, "%u", j);
4077 + sprintf (name, "chan%u", j);
4078 + devfs_mk_symlink (devfs_handles[i],
4081 + link, NULL, NULL);
4085 +#endif /* CONFIG_DEVFS_FS */
4087 + // PTB - sysctl interface
4088 + nbd_table_header = register_sysctl_table (nbd_root_table, 1);
4090 + // PTB we have to wait for the open to complete init with inode val
4100 + for (i = 0; i < MAX_NBD; i++) {
4102 + struct nbd_device *lo = &nbd_dev[i];
4105 + if (!(atomic_read (&lo->flags) & NBD_INITIALISED))
4108 + NBD_INFO ("invalidating buffers on device nd%s%d-%d\n",
4109 + lo->devnam, 0, NBD_MAXCONN);
4111 + for (j = 0; j < NBD_MAXCONN; j++) {
4112 + int minor = i * NBD_MAXCONN + j;
4113 + invalidate_buffers (mk_kdev (major, minor));
4116 + NBD_INFO ("destroying buffers on device nd%s%d-%d\n",
4117 + lo->devnam, 0, NBD_MAXCONN);
4119 + for (j = 0; j < NBD_MAXCONN; j++) {
4120 + int minor = i * NBD_MAXCONN + j;
4121 + __invalidate_buffers (mk_kdev (major, minor), 1);
4125 + unregister_sysctl_table (nbd_table_header);
4127 +#ifdef CONFIG_DEVFS_FS
4128 + if (devfs_handle) {
4129 + for (i = 0; i < MAX_NBD; i++) {
4131 + if (!devfs_handles[i])
4133 + for (j = 0; j < NBD_MAXCONN; j++) {
4139 + s[1] = '0' + (j - 10);
4142 + devfs_remove("nd/%s/%u", device_letter(i), j);
4144 + devfs_remove("nd/%s/disk", device_letter(i));
4146 + devfs_remove("nd/%s/chan%u",device_letter(i),j);
4149 + devfs_remove("nd/%s", device_letter(i));
4151 + devfs_remove("nd");
4155 + remove_proc_entry ("nbdinfo", &proc_root);
4157 + for (i = 0; i < MAX_NBD; i++) {
4158 + struct nbd_device *lo = &nbd_dev[i];
4159 + atomic_clear_mask (NBD_ENABLED, &lo->flags);
4161 + del_gendisk(lo->disk);
4162 + put_disk(lo->disk);
4164 + if (lo->blockmap) {
4165 + kfree (lo->blockmap);
4166 + lo->blockmap = NULL;
4168 + nbd_sync_sync (lo);
4171 + blk_cleanup_queue (nbd_queue);
4173 + if (unregister_blkdev (major, "nbd") != 0) {
4174 + NBD_ALERT ("cleanup_module failed\n");
4176 + NBD_INFO ("module cleaned up.\n");
4182 +module_init (nbd_init);
4183 +module_exit (nbd_cleanup);
4185 +EXPORT_SYMBOL(nbd_remote_ioctl);
4189 + * gcc -O2 -D__KERNEL__ -DMODULE -DEXPORT_SYMTAB -xc -c enbd.c -o enbd.o
4191 + * (possibly with -DMODVERSIONS also). PTB
4192 + * (possibly with -I/usr/src/linux-x.y.z/include also). PTB
4194 --- linux-2.5.64/drivers/block/enbd/enbd_ioctl.c.pre-enbd Mon Mar 24 18:55:25 2003
4195 +++ linux-2.5.64/drivers/block/enbd/enbd_ioctl.c Tue Mar 25 15:45:03 2003
4198 +#include <sys/time.h>
4199 +#include <unistd.h>
4202 +#include <linux/ioctl.h>
4203 +#include <linux/fs.h>
4204 +#include <linux/fd.h>
4206 +#define caddr_t char*
4208 +#include <linux/cdrom.h>
4209 +#include <asm/uaccess.h>
4210 +#include <linux/module.h>
4211 +#include <linux/version.h>
4212 +#include <linux/init.h>
4213 +#ifndef KERNEL_VERSION
4214 +#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))
4216 +#include <linux/blk.h>
4217 +#include <linux/enbd.h>
4218 +#include <linux/enbd_ioctl.h>
4222 + * This is the whitelist of remote ioctls - an entry here tells the
4223 + * driver that it's OK to send this ioctl out over the net, because we
4224 + * have the right info on it.
4226 + * "The right info" is what is on the right hand side of the table (a 0
4227 + * stands for a repetition of the LHS info). We have to fixup something
4228 + * that a lot of kernel authors forgot to do or got worng - namely
4229 + * declare their ioctls in a way that conveys information about their
4230 + * intended mode of use (see iotcl.h in the kernel sources).
4232 + * We need all ioctls to be delared as either
4234 + * _IO(class,id) -- default. Means no args. The call is enough.
4235 + * _IOW(class,id,type) -- we write a value to kernel that is sizeof(type)
4236 + * _IOR(class,id,type) -- we read a value from kernel sizeof(type)
4237 + * _IOWR(class,id,type) -- ibid, but both ways
4239 + * The "R" bit is crucial because it lets us know that the data is
4240 + * _indirected_. I.e. it's an address of somewhere in userspace where
4241 + * we want to read data from or write data to.
4243 + * The "type" part should be the type of the indirected argument, NOT
4244 + * the type of its address!
4246 + * Kernel authors typically make two mistakes:
4248 + * 1) they write _IO instead of _IOR or IOWR, and hence forget the
4249 + * type info. Well, not telling me if the argument data is
4250 + * direct or indirectly accessible was already bad enough!
4251 + * 2) they get the type argument _wrong_ when they do remember to
4252 + * put it. They write "int *" instead of "int", for example,
4253 + * when the argument to the ioctl is a pointer to an integer.
4254 + * OK, so it's a natural mistake to make! But in that case the
4255 + * argument should be "int" so that the kernel macro picks up
4256 + * sizeof(int) instead of sizeof(int*).
4258 + * Those "errors" have to be repaired via this table. Wrong at left,
4259 + * corrected at right. A 0 for the new entry indicates that the old
4260 + * was alright. If there isn't an entry, the ioctl won't be treated.
4261 + * If the size info works out at the max for the field (2^14 - 1)
4262 + * then a extra table is consulted for size and copy methods.
4267 + * PTB the space before the final comma is important as the ##
4268 + * discards the preceding token when D is empty
4270 +#define _NEW_IO_(B,C,D...) C(_IOC_TYPE(B), _IOC_NR(B) , ## D)
4271 +#define _NEW_IO(B,D...) _IO(_IOC_TYPE(B), _IOC_NR(B) , ## D)
4272 +#define _NEW_IOW(B,D...) _IOW(_IOC_TYPE(B), _IOC_NR(B) , ## D)
4273 +#define _NEW_IOR(B,D...) _IOR(_IOC_TYPE(B), _IOC_NR(B) , ## D)
4274 +#define _NEW_IOWR(B,D...) _IOWR(_IOC_TYPE(B), _IOC_NR(B) , ## D)
4275 +#define _NEW_IORS(B) _IOC(_IOC_READ,_IOC_TYPE(B), _IOC_NR(B), _IOC_SIZEMASK)
4276 +#define _NEW_IOWRS(B) _IOC(_IOC_READ|_IOC_WRITE,_IOC_TYPE(B), _IOC_NR(B), _IOC_SIZEMASK)
4278 +static struct ioctl_conv ioctl_conv_tab[] = {
4280 + { BLKROSET, _NEW_IOW(BLKROSET,int), },
4281 + { BLKROGET, _NEW_IOR(BLKROGET,int), },
4282 +//#define BLKRRPART _IO(0x12,95) /* re-read partition table */
4283 + { BLKRRPART, 0, },
4284 + { BLKGETSIZE, _NEW_IOR(BLKGETSIZE,int), },
4285 +//#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */
4286 + { BLKFLSBUF, 0, },
4287 + { BLKRASET, _NEW_IOW(BLKRASET,int), },
4288 + { BLKRAGET, _NEW_IOR(BLKRAGET,int), },
4289 + { BLKFRASET, _NEW_IOW(BLKFRASET,int), },
4290 + { BLKFRAGET, _NEW_IOR(BLKFRAGET,int), },
4291 + { BLKSECTSET, _NEW_IOW(BLKSECTSET,int), },
4292 + { BLKSECTGET, _NEW_IOR(BLKSECTGET,int), },
4293 + { BLKSSZGET, _NEW_IOR(BLKSSZGET,int), },
4296 + { FDSETPRM, _NEW_IOWR(FDSETPRM, struct floppy_struct), },
4297 + { FDDEFPRM, _NEW_IOWR(FDDEFPRM, struct floppy_struct), },
4298 + { FDGETPRM, _NEW_IOR(FDGETPRM, struct floppy_struct), },
4302 + { FDFMTTRK, _NEW_IOWR(FDFMTTRK, struct format_descr), },
4304 + { FDSETEMSGTRESH, _NEW_IOW(FDSETEMSGTRESH, unsigned), },
4306 + { FDSETMAXERRS, _NEW_IOWR(FDSETMAXERRS, struct floppy_max_errors), },
4307 + { FDGETMAXERRS, _NEW_IOR(FDGETMAXERRS, struct floppy_max_errors), },
4308 + { FDGETDRVTYP, _NEW_IOR(FDGETDRVTYP, floppy_drive_name), }, // 16 bytes
4309 + { FDSETDRVPRM, _NEW_IOWR(FDSETDRVPRM, struct floppy_drive_params), },
4310 + { FDGETDRVPRM, _NEW_IOR(FDGETDRVPRM, struct floppy_drive_params), },
4311 + { FDGETDRVSTAT, _NEW_IOR(FDGETDRVSTAT, struct floppy_drive_struct), },
4312 + { FDPOLLDRVSTAT, _NEW_IOR(FDPOLLDRVSTAT, struct floppy_drive_struct), },
4314 + { FDGETFDCSTAT, _NEW_IOR(FDGETFDCSTAT, struct floppy_fdc_state), },
4315 + { FDWERRORCLR, 0, },
4316 + { FDWERRORGET, _NEW_IOR(FDWERRORGET, struct floppy_write_errors), },
4317 + { FDRAWCMD, _NEW_IOWR(FDRAWCMD, struct floppy_raw_cmd[1]) }, // FIXME linked list
4318 + { FDTWADDLE, 0, },
4321 + { CDROMPAUSE, _NEW_IO(CDROMPAUSE), },
4322 + { CDROMRESUME, _NEW_IO(CDROMRESUME), },
4323 + { CDROMPLAYMSF, _NEW_IOR(CDROMPLAYMSF, struct cdrom_msf), },
4324 + { CDROMPLAYTRKIND, _NEW_IOR(CDROMPLAYTRKIND, struct cdrom_ti), },
4325 + { CDROMREADTOCHDR, _NEW_IOWR(CDROMREADTOCHDR, struct cdrom_tochdr), },
4326 + { CDROMREADTOCENTRY, _NEW_IOWR(CDROMREADTOCENTRY, struct cdrom_tocentry), },
4327 + { CDROMSTOP, _NEW_IO(CDROMSTOP), },
4328 + { CDROMSTART, _NEW_IO(CDROMSTART), },
4329 + { CDROMEJECT, _NEW_IO(CDROMEJECT), },
4330 + { CDROMVOLCTRL, _NEW_IOR(CDROMVOLCTRL, struct cdrom_volctrl), },
4331 + { CDROMSUBCHNL, _NEW_IOWR(CDROMSUBCHNL, struct cdrom_subchnl), },
4332 + { CDROMREADMODE2, _NEW_IOR(CDROMREADMODE2, struct cdrom_read), }, // INDIRECT 2336B
4333 + { CDROMREADMODE1, _NEW_IOR(CDROMREADMODE1, struct cdrom_read), }, // INDIRECT 2048B
4334 + { CDROMREADAUDIO, _NEW_IOR(CDROMREADAUDIO, struct cdrom_read_audio), },
4335 + { CDROMEJECT_SW, _NEW_IO(CDROMEJECT_SW), },
4336 + { CDROMMULTISESSION, _NEW_IOWR(CDROMMULTISESSION, struct cdrom_multisession), },
4337 + { CDROM_GET_MCN, _NEW_IOWR(CDROM_GET_MCN, struct cdrom_mcn), },
4338 + { CDROMRESET, _NEW_IO(CDROMRESET), },
4339 + { CDROMVOLREAD, _NEW_IOWR(CDROMVOLREAD, struct cdrom_volctrl), },
4340 + { CDROMREADRAW, _NEW_IOR(CDROMREADRAW, struct cdrom_read), }, // INDIRECT 2352B
4341 + // aztcd.c optcd.c
4342 + { CDROMREADCOOKED, _NEW_IOR(CDROMREADCOOKED, struct cdrom_msf), }, // INDIRECT FIXME
4343 + { CDROMSEEK, _NEW_IOR(CDROMSEEK, struct cdrom_msf), },
4345 + { CDROMPLAYBLK, _NEW_IOWR(CDROMPLAYBLK, struct cdrom_blk), },
4347 + { CDROMREADALL, _NEW_IOR(CDROMREADALL, char[2646]), },
4349 + { CDROMGETSPINDOWN, _NEW_IOWR(CDROMGETSPINDOWN, char), }, // one byte
4350 + { CDROMSETSPINDOWN, _NEW_IOWR(CDROMSETSPINDOWN, char), }, // one byte
4352 + { CDROMCLOSETRAY, _NEW_IO(CDROMCLOSETRAY), },
4353 + { CDROM_SET_OPTIONS, _NEW_IOW(CDROM_SET_OPTIONS, int), },
4354 + { CDROM_CLEAR_OPTIONS, _NEW_IOW(CDROM_CLEAR_OPTIONS, int), },
4355 + { CDROM_SELECT_SPEED, _NEW_IOW(CDROM_SELECT_SPEED, int), }, // FIXME
4356 + { CDROM_SELECT_DISC, _NEW_IOW(CDROM_SELECT_DISC, int), },
4357 + { CDROM_MEDIA_CHANGED, _NEW_IOW(CDROM_MEDIA_CHANGED, int), },
4358 + { CDROM_DRIVE_STATUS, _NEW_IOW(CDROM_DRIVE_STATUS, int), },
4359 + { CDROM_DISC_STATUS, _NEW_IO(CDROM_DISC_STATUS), },
4360 + { CDROM_CHANGER_NSLOTS, _NEW_IO(CDROM_CHANGER_NSLOTS), },
4361 + { CDROM_LOCKDOOR, _NEW_IOW(CDROM_LOCKDOOR, int), },
4362 + { CDROM_DEBUG, _NEW_IOW(CDROM_DEBUG, int), },
4363 + { CDROM_GET_CAPABILITY, _NEW_IO(CDROM_GET_CAPABILITY), },
4365 + { CDROMAUDIOBUFSIZ, _NEW_IOW(CDROMAUDIOBUFSIZ, int), },
4367 + { DVD_READ_STRUCT, _NEW_IOR(DVD_READ_STRUCT, dvd_struct), },
4368 + { DVD_WRITE_STRUCT, _NEW_IOWR(DVD_WRITE_STRUCT, dvd_struct), },
4369 + { DVD_AUTH, _NEW_IOWR(DVD_AUTH, dvd_authinfo), },
4370 + { CDROM_SEND_PACKET, _NEW_IOR(CDROM_SEND_PACKET, struct cdrom_generic_command), },
4371 + { CDROM_NEXT_WRITABLE, _NEW_IOWR(CDROM_NEXT_WRITABLE, long), },
4372 + { CDROM_LAST_WRITTEN, _NEW_IOWR(CDROM_LAST_WRITTEN, long), },
4373 + // PTB local test ioctls
4374 + { NBD_TEST_IOCTL1, 0, }, // write an int
4375 + { NBD_TEST_IOCTL2, 0, }, // read an int
4376 + { NBD_TEST_IOCTL3, 0, }, // write and read an int
4377 + { NBD_TEST_IOCTL4, 0, }, // read 256B
4378 + { NBD_TEST_IOCTL5, 0, }, // r/w 256B
4379 + { NBD_TEST_IOCTL6, _NEW_IORS(NBD_TEST_IOCTL6), }, // read special
4380 + { NBD_TEST_IOCTL7, _NEW_IORS(NBD_TEST_IOCTL7), }, // r/w special
4381 + // PTB we must terminate with a 0,0 entry.
4386 + * This should be the table of special methods for certain ioctls.
4387 + * The "new" code is the real index. It will have a size count of
4388 + * _IOC_SIZEMASK but the rest of it should be meaningful. The size is
4389 + * gotten by dynamic lookup using the size() function.
4391 +static struct ioctl_special ioctl_special_tab[] = {
4392 + // PTB last entry must be all zeros
4393 + { 0, NULL, NULL, NULL, NULL, },
4397 +static struct ioctl_conv *
4398 +ioctl_lookup_old (int ioctl)
4404 + for (i = 0; old = ioctl_conv_tab[i].old, old; i++) {
4406 + return &ioctl_conv_tab[i];
4413 +nbd_ioctl_convert (int ioctl)
4415 + struct ioctl_conv *conv = ioctl_lookup_old (ioctl);
4419 + return conv->new ? : ioctl;
4423 +nbd_ioctl_convert_inplace(int *ioctl) {
4428 + new_ioctl = nbd_ioctl_convert(*ioctl);
4429 + if (new_ioctl == -1)
4431 + *ioctl = new_ioctl;
4435 +static struct ioctl_conv *
4436 +ioctl_lookup_new (int ioctl)
4440 + for (i = 0; old = ioctl_conv_tab[i].old, old; i++) {
4441 + unsigned new = ioctl_conv_tab[i].new;
4442 + if (new == ioctl || (new == 0 && old == ioctl))
4443 + return &ioctl_conv_tab[i];
4450 +nbd_ioctl_revert (int ioctl)
4452 + struct ioctl_conv *conv = ioctl_lookup_new (ioctl);
4459 +static struct ioctl_special *
4460 +ioctl_special_lookup_new (int ioctl)
4464 + for (i = 0; new = ioctl_special_tab[i].new, new; i++) {
4466 + return &ioctl_special_tab[i];
4473 +nbd_ioctl_size (int cmd, char *arg)
4475 + int size = _IOC_SIZE (cmd);
4476 + if (size == _IOC_SIZEMASK) {
4477 + // PTB special hadling required.
4478 + struct ioctl_special *special = ioctl_special_lookup_new(cmd);
4481 + return special->size (arg);
4487 +nbd_ioctl_size_user (int cmd, char *arg)
4489 + int size = _IOC_SIZE (cmd);
4490 + if (size == _IOC_SIZEMASK) {
4491 + // PTB special hadling required.
4492 + struct ioctl_special *special = ioctl_special_lookup_new(cmd);
4495 + return special->size_user (arg);
4503 +nbd_ioctl_copy_to_user (int cmd, char *arg, char *buf, int size)
4506 + if (_IOC_SIZE (cmd) == _IOC_SIZEMASK) {
4507 + struct ioctl_special *special = ioctl_special_lookup_new(cmd);
4510 + return special->ioctl_copy_to_user (arg, buf, size);
4513 + if (_IOC_DIR (cmd) & _IOC_READ) {
4515 + copy_to_user (arg, buf, size);
4525 +nbd_ioctl_copy_from_user (int cmd, char *buf, char *arg, int size)
4528 + if (_IOC_SIZE (cmd) == _IOC_SIZEMASK) {
4529 + struct ioctl_special *special = ioctl_special_lookup_new(cmd);
4532 + return special->ioctl_copy_from_user (buf, arg, size);
4535 + if (_IOC_DIR (cmd) & _IOC_READ) {
4537 + copy_from_user (buf, arg, size);
4542 + if (size > sizeof (arg)) {
4546 + memcpy (buf, &arg, size);
4550 +static struct nbd_ioctl struct_ioctl = {
4551 + convert : nbd_ioctl_convert,
4552 + convert_inplace : nbd_ioctl_convert_inplace,
4553 + revert : nbd_ioctl_revert,
4554 + size : nbd_ioctl_size,
4555 + size_user : nbd_ioctl_size_user,
4556 + cp_to_user : nbd_ioctl_copy_to_user,
4557 + cp_from_user : nbd_ioctl_copy_from_user,
4561 +nbd_ioctl_init (void)
4563 + struct nbd_ioctl_stub * remote_ioctl = &nbd_remote_ioctl;
4564 + remote_ioctl->reg(remote_ioctl, &struct_ioctl);
4569 +nbd_ioctl_cleanup (void) {
4570 + struct nbd_ioctl_stub * remote_ioctl = &nbd_remote_ioctl;
4571 + remote_ioctl->unreg(remote_ioctl, &struct_ioctl);
4574 +module_init (nbd_ioctl_init);
4575 +module_exit (nbd_ioctl_cleanup);
4577 +int linux_version_code = LINUX_VERSION_CODE;
4580 + #if LINUX_VERSION_CODE > KERNEL_VERSION(2,1,0)
4581 + MODULE_AUTHOR ("Peter T. Breuer");
4582 + MODULE_DESCRIPTION ("Enhanced Network Block Device Remote Ioctl");
4583 + #ifdef MODULE_LICENSE
4584 + MODULE_LICENSE("GPL");
4587 +#endif /* MODULE */
4589 +#endif /* __KERNEL__ */
4594 +int ioctl_init(struct ioctl_conv *self, int old, int new) {
4597 + self->serialize = ioctl_serialize;
4598 + self->deserialize = ioctl_deserialize;
4599 + self->size = ioctl_size;
4602 --- linux-2.5.64/drivers/block/enbd/enbd_ioctl_stub.c.pre-enbd Mon Mar 24 18:55:25 2003
4603 +++ linux-2.5.64/drivers/block/enbd/enbd_ioctl_stub.c Mon Mar 24 22:51:59 2003
4606 +#include <linux/stddef.h>
4607 +#include <linux/errno.h>
4608 +#include <linux/enbd.h>
4609 +#include <linux/enbd_ioctl.h>
4612 + * PTB this is the hook for the enbd_ioctl extra module
4614 + static int register_remote_ioctl(struct nbd_ioctl_stub *remote_ioctl, struct nbd_ioctl *x) {
4615 + if (!remote_ioctl->remote) {
4616 + remote_ioctl->remote = x;
4621 + static int unregister_remote_ioctl(struct nbd_ioctl_stub *remote_ioctl, struct nbd_ioctl *x) {
4622 + if (remote_ioctl->remote != x)
4624 + remote_ioctl->remote = NULL;
4628 +int nbd_init_ioctl_stub(struct nbd_ioctl_stub *remote_ioctl) {
4629 + memset(remote_ioctl, 0, sizeof(*remote_ioctl));
4630 + remote_ioctl->reg = register_remote_ioctl;
4631 + remote_ioctl->unreg = unregister_remote_ioctl;
4635 --- linux-2.5.64/drivers/block/enbd/enbd_md.c.pre-enbd Mon Mar 24 18:55:25 2003
4636 +++ linux-2.5.64/drivers/block/enbd/enbd_md.c Mon Mar 24 22:51:59 2003
4638 +#include <linux/enbd.h>
4639 +#include <linux/enbd_ioctl.h>
4642 + * PTB small driver wide support database for MDRGTR ioctl
4647 +#ifndef HOT_ADD_DISK
4648 + #define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
4650 +#ifndef SET_DISK_FAULTY
4651 + #define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
4655 +nbd_md_dec (struct nbd_md *md) {
4656 + down(&md->access_sem);
4657 + if (--md->count <= 0)
4658 + md->notify_fn = NULL;
4659 + up(&md->access_sem);
4662 +nbd_md_inc (struct nbd_md *md) {
4663 + down(&md->access_sem);
4665 + up(&md->access_sem);
4668 +nbd_md_reg (struct nbd_md *md, int(*fn)(kdev_t, int)) {
4669 + down(&md->access_sem);
4670 + if (!md->notify_fn) {
4671 + md->notify_fn = fn;
4674 + up(&md->access_sem);
4678 + * PTB tell md devices in which we are embedded that we are alright
4680 + * @lo the nbd device to tell them about
4683 +nbd_notify_md_devices (struct nbd_md *md, kdev_t nbd_dev)
4685 + //kdev_t nbd_dev = mk_kdev (major, nbd << NBD_SHIFT);
4688 + down (&md->access_sem);
4689 + if (md->count > 0 && md->notify_fn) {
4690 + NBD_ALERT ("adding %x:%x to raid devices via fn\n",
4691 + major(nbd_dev), minor(nbd_dev));
4692 + err = md->notify_fn (nbd_dev, HOT_ADD_DISK);
4694 + NBD_ALERT ("HOT_ADD to raid devices returned %d\n",
4698 + up (&md->access_sem);
4704 +nbd_unnotify_md_devices (struct nbd_md *md, kdev_t nbd_dev)
4707 + // kdev_t nbd_dev = mk_kdev (major, lo->nbd << NBD_SHIFT);
4710 + down (&md->access_sem);
4711 + if (md->count > 0 && md->notify_fn) {
4712 + NBD_ALERT ("erasing %x:%x from raid devices via fn\n",
4713 + major(nbd_dev), minor(nbd_dev));
4714 + err = md->notify_fn (nbd_dev, SET_DISK_FAULTY);
4717 + ("SETFAULTY to raid devices returned %d\n", err);
4720 + up (&md->access_sem);
4725 +nbd_init_md(struct nbd_md *md)
4727 + md->notify_fn = NULL;
4729 + init_MUTEX(&md->access_sem);
4730 + md->notify = nbd_notify_md_devices;
4731 + md->unnotify = nbd_unnotify_md_devices;
4732 + md->dec = nbd_md_dec;
4733 + md->inc = nbd_md_inc;
4734 + md->reg = nbd_md_reg;
4737 --- linux-2.5.64/drivers/block/enbd/enbd_seqno.c.pre-enbd Mon Mar 24 18:55:25 2003
4738 +++ linux-2.5.64/drivers/block/enbd/enbd_seqno.c Mon Mar 24 23:28:41 2003
4740 +#include <linux/enbd.h>
4743 +#define _NBD_GENERATION (8*sizeof(int) - __REQ_NBDSEQNO)
4745 + * PTB increment the devices seqno
4747 + * @lo the nbd device to increment the seqno of
4750 +seqno_inc(struct nbd_seqno *nseqno)
4752 + if (nseqno->seqno < (1 << _NBD_GENERATION)) {
4756 + // PTB next generation !
4757 + nseqno->seqno = 0;
4758 + atomic_inc (&nseqno->seqno_gen);
4761 +seqno_get (struct nbd_seqno *nseqno)
4763 + return nseqno->seqno;
4766 +seqno_reset (struct nbd_seqno *nseqno)
4768 + nseqno->seqno = 0;
4769 + atomic_set(&nseqno->seqno_gen,0);
4772 + * PTB convert a seqno number into one with an extra generation number
4773 + * in the msb, so that it can be compared with others. return the
4776 + * We add the current generation no. to small seqnos, and we add the
4777 + * previous generation no. to large seqnos.
4779 + * @lo the nbd device to look at
4780 + * @seqno the small sequence number to return the full seq number for
4782 +static unsigned int
4783 +seqno_calc (struct nbd_seqno *nseqno, unsigned int seqno)
4785 + unsigned int genno;
4786 + static unsigned int absdiff(unsigned int x, unsigned int y) {
4793 + genno = atomic_read (&nseqno->seqno_gen);
4794 + if (absdiff(seqno,nseqno->seqno) < (1 << (_NBD_GENERATION - 1))) {
4795 + return seqno + (genno << _NBD_GENERATION);
4797 + if (seqno < nseqno->seqno) {
4798 + return seqno + ((genno + 1) << _NBD_GENERATION);
4800 + return seqno + ((genno - 1) << _NBD_GENERATION);
4803 +void nbd_init_seqno (struct nbd_seqno *nseqno) {
4805 + seqno_reset(nseqno);
4807 + nseqno->inc = seqno_inc;
4808 + nseqno->get = seqno_get;
4809 + nseqno->reset = seqno_reset;
4810 + nseqno->calc = seqno_calc;
4815 --- linux-2.5.64/drivers/block/enbd/enbd_speed.c.pre-enbd Mon Mar 24 18:55:25 2003
4816 +++ linux-2.5.64/drivers/block/enbd/enbd_speed.c Mon Mar 24 23:28:50 2003
4818 +#include <linux/enbd.h>
4819 +#include <linux/enbd_ioctl.h>
4822 + * PTB - update speed counters (if at least 5s has passed)
4824 + * @spd the speed struct to update
4827 +spd_update (struct nbd_speed *spd, int distance)
4830 + // last time we measured
4831 + int lastjiffy = atomic_read (&spd->jiffy);
4832 + // jiffies since last time
4833 + int djiffy = jiffies - lastjiffy;
4835 + // previous no we measured
4836 + int lastdist = atomic_read (&spd->distance);
4837 + // blocks since last time
4838 + int ddistance = distance - lastdist;
4840 + // write every 5 second in time
4841 + if (djiffy > 5 * HZ) {
4843 + // jiffies since first time
4844 + int tjiffy = jiffies - atomic_read (&spd->frstj);
4846 + // max tot speed measured so far
4847 + int speedmax = atomic_read (&spd->speedmax);
4849 + // last instantaneous speed we measured
4850 + int lastspeed = atomic_read (&spd->speed);
4852 + // instantaneous read blocks/s
4853 + int speed = djiffy ? (ddistance * HZ) / djiffy : 0;
4856 + int speedsmoothed =
4857 + (djiffy * speed + HZ * lastspeed) / (djiffy + HZ);
4859 + // average speed to now in KB/s
4860 + int speedav = tjiffy ? (distance * HZ) / tjiffy : 0;
4862 + // smoothing count for max
4864 + (speedav > speedsmoothed) ? speedav : speedsmoothed;
4867 + atomic_set (&spd->speed, speedsmoothed);
4868 + if (speedhi > speedmax)
4869 + atomic_set (&spd->speedmax, speedhi);
4870 + atomic_set (&spd->distance, distance);
4871 + atomic_set (&spd->speedav, speedav);
4872 + atomic_set (&spd->jiffy, jiffies);
4877 +nbd_init_speed(struct nbd_speed *spd) {
4878 + memset(spd, 0, sizeof(*spd));
4879 + spd->update = spd_update;
4882 --- linux-2.5.64/drivers/block/enbd/enbd_proc.c.pre-enbd Tue Mar 25 15:16:33 2003
4883 +++ linux-2.5.64/drivers/block/enbd/enbd_proc.c Mon Mar 24 22:52:00 2003
4886 +#include <linux/enbd.h>
4887 +#include <linux/proc_fs.h>
4889 +extern struct nbd_device * nbd_get(int i);
4892 +do_reset (int reset, int i) {
4894 + void do_r (void) {
4895 + struct nbd_device *lo = nbd_get(i);
4897 + lo->reset (lo, i);
4901 + if (i >= 0 && i < MAX_NBD) {
4908 + * PTB This is just to get a nice limited width integer printout in proc!
4909 + * use endpos (<= 8) spaces at most. We serve from a static buffer size 16.
4912 +display (unsigned n, int endpos)
4914 + // PTB use endpos (<= 8) spaces at most
4915 + static char buf[16];
4918 + int decpos = endpos;
4919 + int wholepart = n, fractionpart = 0;
4920 + buf[endpos--] = 0;
4921 + // PTB find the right units to display. U or K or M or G.
4922 + while (n >= 1 << 10) {
4923 + decimals = n & ((1 << 10) - 1);
4931 + buf[endpos--] = 'K';
4934 + buf[endpos--] = 'M';
4937 + buf[endpos--] = 'G';
4940 + buf[endpos--] = 'T';
4943 + // after this wholepart = n && fractionpart = decimals
4944 + fractionpart = wholepart & ((1 << (units * 10)) - 1);
4945 + wholepart >>= units * 10;
4946 + // PTB write the whole digits (something between 0 and 1023 inclusive)
4948 + buf[endpos--] = '0';
4950 + while (endpos >= 0 && n > 0) {
4951 + buf[endpos--] = '0' + n % 10;
4955 + // PTB if there is space and cause, add decimal digits
4956 + if (endpos >= 1 && units > 0) {
4958 + char unitchar = buf[--decpos];
4959 + buf[decpos + k++] = '.';
4960 + while (endpos >= k) {
4961 + int digit = (decimals * 10) >> 10;
4962 + buf[decpos + k++] = '0' + digit;
4963 + decimals -= (digit << 10) / 10;
4966 + buf[decpos + k++] = unitchar;
4967 + buf[decpos + k] = 0;
4969 + // PTB report the start position
4970 + return buf + endpos + 1;
4975 +set_generic (int x, int i, int X)
4977 + void set_x (void) {
4978 + struct nbd_device *lo = nbd_get(i);
4979 + if (lo->magic != NBD_DEV_MAGIC)
4982 + atomic_set_mask (X, &lo->flags);
4985 + atomic_clear_mask (X, &lo->flags);
4988 + if (i >= 0 && i < MAX_NBD) {
4992 + for (i = 0; i < MAX_NBD; i++) {
4998 +set_sync_intvl (int sync_intvl, int i)
5000 + set_generic(sync_intvl, i, NBD_SYNC);
5005 +set_show_errs (int show_errs, int i)
5007 + set_generic(show_errs, i, NBD_SHOW_ERRS);
5011 +set_md5sum (int md5sum, int i)
5013 + set_generic(md5sum, i, NBD_MD5SUM);
5018 +set_enable (int enable, int i)
5020 + void set_e (void) {
5021 + struct nbd_device *lo = nbd_get(i);
5022 + if (!lo || lo->magic != NBD_DEV_MAGIC)
5024 + if (enable != 0) {
5025 + if (!(atomic_read (&lo->flags) & NBD_ENABLED)) {
5033 + if (i >= 0 && i < MAX_NBD) {
5037 + for (i = 0; i < MAX_NBD; i++) {
5043 +set_direct (int direct, int i)
5045 + set_generic(direct, i, NBD_DIRECT);
5048 +#ifndef NO_BUFFERED_WRITES
5050 +set_buffer_writes (int buffer_writes, int i)
5052 + set_generic(buffer_writes, i, NBD_BUFFERWR);
5057 +set_merge_requests (int mr, int i)
5059 + void set_mr (void) {
5060 + struct nbd_device *lo = nbd_get(i);
5061 + if (lo->magic != NBD_DEV_MAGIC)
5063 + atomic_set (&lo->merge_requests, mr);
5066 + if (i >= 0 && i < MAX_NBD) {
5070 + for (i = 0; i < MAX_NBD; i++) {
5076 +nbd_read_proc (char *buf, char **start, off_t offset, int len, int *eof,
5081 +#define MIN(x,y) ((x)<(y)?(x):(y))
5084 + const int limit = MIN (PAGE_SIZE, len) - 80;
5086 + struct nbd_device *lo;
5088 + static void *next_label;
5089 + static char *next_label_name;
5091 + unsigned long flags;
5093 + if (offset > 0 && !next_label) {
5099 + if (offset <= 0) {
5100 + // PTB do static inits first time through
5103 + next_label = NULL;
5104 + next_label_name = NULL;
5108 + // PTB start this bytecount
5111 +#define NBD_PROC_LABEL(n) \
5112 + next_label = &&label_##n; \
5113 + next_label_name = "label_" #n; \
5114 + if (len > limit) { \
5115 + *start = (char *) (unsigned long) len; \
5121 + for ( /* static init */ ; i < MAX_NBD; i++) {
5126 + devnam = lo->devnam;
5127 + if (lo->nslot <= 0) {
5128 + next_label = NULL;
5132 + // PTB computed goto next not-done
5134 + void *label = next_label;
5135 + next_label = NULL;
5136 + next_label_name = NULL;
5141 + NBD_PROC_LABEL (1);
5143 + if (last == i - 2) {
5144 + struct nbd_device * lo = nbd_get (i - 1);
5145 + char *prevdevnam = lo->devnam;
5147 + sprintf (buf + len, "Device %s:\tClosed\n",
5150 + if (last < i - 2) {
5151 + struct nbd_device * llo = nbd_get (last + 1);
5152 + struct nbd_device * plo = nbd_get (i - 1);
5153 + char lastdevnam[3];
5154 + char prevdevnam[3];
5155 + strncpy (lastdevnam, llo->devnam, 3);
5156 + strncpy (prevdevnam, plo->devnam, 3);
5158 + sprintf (buf + len, "Device %s-%s:\tClosed\n",
5159 + lastdevnam, prevdevnam);
5162 + NBD_PROC_LABEL (2);
5165 + sprintf (buf + len, "Device %s:\tOpen " "\n", devnam);
5167 + NBD_PROC_LABEL (3);
5169 + len += sprintf (buf + len,
5170 + "[%s] State:\t%s%s%s%s%s%s%s%s%s%s%s%slast error %d, lives %d, bp %d\n",
5171 + devnam, atomic_read (&lo->flags)
5172 + & NBD_INITIALISED ? "" : "uninitialized, ",
5173 + atomic_read (&lo->flags)
5174 + & NBD_WRITE_NOCHK ? "noverify, " :
5175 + "verify, ", lo->read_only(lo) ? "ro, " : "rw, ",
5176 + atomic_read(&lo->merge_requests) ? "merge requests, " : "",
5177 +#ifndef NO_BUFFERED_WRITES
5178 + atomic_read (&lo->flags)
5179 + & NBD_BUFFERWR ? "buffer writes, " : "",
5182 +#endif /* NO_BUFFERED_WRITES */
5183 + atomic_read (&lo->flags)
5184 + & NBD_ENABLED ? "enabled, " : "disabled, ",
5185 + atomic_read (&lo->flags)
5186 + & NBD_VALIDATED ? "validated, " : "",
5187 + atomic_read (&lo->flags)
5188 + & NBD_REMOTE_INVALID ? "remote invalid, " : "",
5189 + atomic_read (&lo->flags)
5190 + & NBD_SHOW_ERRS ? "show_errs, " : "",
5191 + atomic_read (&lo->flags)
5192 + & NBD_DIRECT ? "direct, " : "",
5193 + atomic_read (&lo->flags)
5194 + & NBD_SYNC ? "sync, " : "",
5195 + atomic_read (&lo->flags)
5196 + & NBD_MD5SUM ? "md5sum, " : "",
5199 + ((atomic_read (&lo->flags) & NBD_ENABLED) ?
5200 + 1 : 0), 0 //atomic_read(&buffermem_pages)
5203 + NBD_PROC_LABEL (4);
5205 + do { // PTB begin long do once block
5206 + int countq[2] = { 0, 0 };
5209 + struct list_head *pos;
5211 + read_lock_irqsave (&lo->queue_lock, flags);
5213 + list_for_each (pos, &lo->queue) {
5214 + struct request *req =
5215 + list_entry (pos, struct request, queuelist);
5216 + if (countq[READ] + countq[WRITE] > 1000)
5219 + cmd = rq_data_dir (req);
5223 + read_unlock_irqrestore (&lo->queue_lock, flags);
5225 + len += sprintf (buf + len,
5226 + "[%s] Queued:\t+%dR/%dW curr (check %dR/%dW) +%dR/%dW max\n",
5228 + atomic_read (&lo->countq[READ]),
5229 + atomic_read (&lo->countq[WRITE]),
5230 + countq[READ], countq[WRITE],
5231 + atomic_read (&lo->maxq[READ]),
5232 + atomic_read (&lo->maxq[WRITE]));
5233 + } while (0); // PTB end long do once block
5235 + NBD_PROC_LABEL (5);
5237 + len += sprintf (buf + len,
5238 + "[%s] Buffersize:\t%d\t(sectors=%d, blocks=%d)\n",
5239 + devnam, lo->bufsiz, lo->max_sectors,
5240 + lo->max_sectors / (lo->blksize >> 9));
5242 + sprintf (buf + len, "[%s] Blocksize:\t%d\t(log=%d)\n",
5243 + devnam, lo->blksize, lo->logblksize);
5245 + sprintf (buf + len, "[%s] Size:\t%luKB\n", devnam,
5246 + (unsigned long) (lo->bytesize >> 10));
5248 + sprintf (buf + len, "[%s] Blocks:\t%u\n", devnam,
5249 + lo->size >> (lo->logblksize - 10));
5251 + NBD_PROC_LABEL (6);
5254 + sprintf (buf + len, "[%s] Sockets:\t%d", devnam,
5257 + NBD_PROC_LABEL (7);
5259 + do { // PTB begin short do once block
5261 + for (j = 0; j < lo->nslot; j++) {
5262 + struct nbd_slot *slotj = &lo->slots[j];
5263 + if (j != atomic_read (&lo->islot)) {
5265 + sprintf (buf + len, "\t(%s)",
5266 + slotj->file ? "+" : "-");
5269 + sprintf (buf + len, "\t(%s)",
5270 + slotj->file ? "*" : ".");
5273 + } while (0); // PTB end short do once block
5275 + len += sprintf (buf + len, "\n");
5277 + NBD_PROC_LABEL (8);
5279 + len += sprintf (buf + len, "[%s] Requested:\t%s", devnam,
5280 + display (atomic_read
5281 + (&lo->requests_in[READ]) +
5282 + atomic_read (&lo->requests_in
5285 + NBD_PROC_LABEL (9);
5287 + do { // PTB begin short do once block
5290 + for (j = 0; j < lo->nslot; j++) {
5291 + struct nbd_slot *slotj = &lo->slots[j];
5293 + sprintf (buf + len, "\t(%s)",
5294 + display (slotj->in, 5));
5297 + display (atomic_read
5298 + (&lo->requests_in[READ]), 6), 7);
5300 + display (atomic_read
5301 + (&lo->requests_in[WRITE]), 6),
5304 + sprintf (buf + len, "\t%sR/%sW", buff[0],
5306 + lo->set_speed (lo);
5307 + len += sprintf (buf + len, "\tmax %d",
5308 + atomic_read (&lo->maxreqblks));
5309 + } while (0); // PTB end short do once block
5311 + len += sprintf (buf + len, "\n");
5312 + len += sprintf (buf + len, "[%s] Despatched:\t%s", devnam,
5313 + display (atomic_read
5314 + (&lo->requests_out[READ]) +
5315 + atomic_read (&lo->requests_out
5318 + NBD_PROC_LABEL (10);
5320 + do { // PTB begin short do once block
5323 + for (j = 0; j < lo->nslot; j++) {
5324 + struct nbd_slot *slotj = &lo->slots[j];
5326 + sprintf (buf + len, "\t(%s)",
5327 + display (slotj->out, 5));
5330 + display (atomic_read
5331 + (&lo->requests_out[READ]), 6),
5334 + display (atomic_read
5335 + (&lo->requests_out[WRITE]), 6),
5338 + sprintf (buf + len, "\t%sR/%sW", buff[0],
5341 + sprintf (buf + len, "\tmd5 %sW",
5342 + display (atomic_read
5343 + (&lo->wrequests_5to), 5));
5345 + sprintf (buf + len, " (%s eq,",
5346 + display (atomic_read
5347 + (&lo->wrequests_5so), 5));
5349 + sprintf (buf + len, " %s ne,",
5350 + display (atomic_read
5351 + (&lo->wrequests_5wo), 5));
5353 + sprintf (buf + len, " %s dn)",
5354 + display (atomic_read
5355 + (&lo->wrequests_5eo), 5));
5356 + } while (0); // PTB end short do once block
5358 + len += sprintf (buf + len, "\n");
5359 + len += sprintf (buf + len, "[%s] Errored:\t%s", devnam,
5360 + display (atomic_read (&lo->requests_err),
5363 + NBD_PROC_LABEL (11);
5365 + do { // PTB begin short do once block
5370 + for (j = 0; j < lo->nslot; j++) {
5371 + struct nbd_slot *slotj = &lo->slots[j];
5373 + sprintf (buf + len, "\t(%s)",
5374 + display (slotj->err, 5));
5375 + toterrs += slotj->err;
5377 + strncpy (buff[0], display (toterrs, 6), 7);
5379 + display (atomic_read (&lo->requests_err) -
5382 + sprintf (buf + len, "\t%s+%s\n", buff[0],
5384 + } while (0); // PTB end short do once block
5386 + NBD_PROC_LABEL (12);
5388 + do { // PTB begin long do once block
5389 + int pending_rblks = 0; /* PTB reads not reached the slots yet */
5390 + int pending_wblks = 0; /* PTB writes not reached the slots yet */
5393 + read_lock_irqsave (&lo->queue_lock, flags);
5395 + do { // PTB begin short do once block
5396 + struct list_head *pos;
5399 + struct request *req;
5401 + list_for_each (pos, &lo->queue) {
5403 + list_entry (pos, struct request,
5405 + if (count++ > 1000)
5407 + blks = req->nr_sectors / lo->blksize;
5409 + switch (rq_data_dir (req)) {
5421 + } while (0); // PTB end short do once block
5423 + read_unlock_irqrestore (&lo->queue_lock, flags);
5425 + sprintf (buf + len, "[%s] Pending:\t%d", devnam,
5426 + atomic_read (&lo->requests_req[READ]) +
5427 + atomic_read (&lo->requests_req[WRITE]));
5429 + do { // PTB begin short do once block
5431 + for (j = 0; j < lo->nslot; j++) {
5432 + struct nbd_slot *slotj =
5435 + sprintf (buf + len, "\t(%d)",
5438 + } while (0); // PTB end short do once block
5440 + len += sprintf (buf + len,
5441 + "\t%dR/%dW+%dR/%dW\n",
5442 + atomic_read (&lo->requests_req[READ]),
5443 + atomic_read (&lo->requests_req[WRITE]),
5444 + pending_rblks, pending_wblks);
5446 + } while (0); // PTB end long do once block
5448 + NBD_PROC_LABEL (13);
5450 + do { // PTB begin long do once block
5452 + int shift = lo->logblksize;
5455 + display (atomic_read (&lo->wspeed.speed)
5458 + display (atomic_read (&lo->wspeed.speedav)
5461 + display (atomic_read
5462 + (&lo->wspeed.speedmax) << shift,
5466 + display (atomic_read (&lo->rspeed.speed)
5469 + display (atomic_read (&lo->rspeed.speedav)
5472 + display (atomic_read
5473 + (&lo->rspeed.speedmax) << shift,
5477 + display (atomic_read (&lo->tspeed.speed)
5480 + display (atomic_read (&lo->tspeed.speedav)
5483 + display (atomic_read
5484 + (&lo->tspeed.speedmax) << shift,
5488 + sprintf (buf + len, "[%s] B/s now:", devnam);
5490 + sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[6],
5491 + buff[3], buff[0]);
5493 + sprintf (buf + len, "[%s] B/s ave:", devnam);
5495 + sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[7],
5496 + buff[4], buff[1]);
5498 + sprintf (buf + len, "[%s] B/s max:", devnam);
5500 + sprintf (buf + len, "\t%s\t(%sR+%sW)\n", buff[8],
5501 + buff[5], buff[2]);
5502 + } while (0); // PTB end long do once block
5504 + do { // PTB begin short do once block
5509 + sprintf (buf + len, "[%s] Spectrum:", devnam);
5511 + blks <= atomic_read (&lo->maxreqblks); blks++) {
5513 + atomic_read (&lo->req_in[READ][blks]) +
5514 + atomic_read (&lo->req_in[WRITE][blks]);
5518 + blks <= atomic_read (&lo->maxreqblks); blks++) {
5520 + atomic_read (&lo->req_in[READ][blks])
5521 + + atomic_read (&lo->req_in[WRITE][blks]);
5524 + 0 ? (100 * req_blks) / tot_reqs : 0;
5528 + sprintf (buf + len, "\t%u%%%d", percent,
5531 + len += sprintf (buf + len, "\n");
5532 + } while (0); // PTB end short do once block
5534 + NBD_PROC_LABEL (14);
5536 + len += sprintf (buf + len, "[%s] Kthreads:\t%d", devnam,
5537 + atomic_read (&lo->kthreads));
5539 + sprintf (buf + len, "\t(%d waiting/%d running/%d max)\n",
5540 + atomic_read (&lo->kwaiters),
5541 + atomic_read (&lo->kthreads) -
5542 + atomic_read (&lo->kwaiters),
5543 + atomic_read (&lo->kmax));
5545 + NBD_PROC_LABEL (15);
5547 + len += sprintf (buf + len, "[%s] Cthreads:\t%d", devnam,
5548 + atomic_read (&lo->cthreads));
5550 + NBD_PROC_LABEL (16);
5554 + for (j = 0; j < lo->nslot; j++) {
5555 + struct nbd_slot *slotj = &lo->slots[j];
5557 + ((slotj->flags & NBD_SLOT_RUNNING) ? 1 :
5559 + ((slotj->flags & NBD_SLOT_WAITING) ? 2 :
5565 + break; /* PTB not in */
5568 + break; /* PTB in and not waiting */
5571 + break; /* PTB impossible */
5574 + break; /* PTB in and waiting */
5576 + len += sprintf (buf + len, "\t(%s)", desc);
5580 + len += sprintf (buf + len, "\n");
5582 + NBD_PROC_LABEL (17);
5585 + len += sprintf (buf + len, "[%s] Cpids:\t%d", devnam,
5586 + atomic_read (&lo->cthreads));
5590 + for (j = 0; j < lo->nslot; j++) {
5591 + struct nbd_slot *slotj = &lo->slots[j];
5593 + sprintf (buf + len, "\t(%u)", slotj->pid);
5595 + len += sprintf (buf + len, "\n");
5600 + for (j = 0; j < lo->nslot; j++) {
5601 + struct nbd_slot *slotj = &lo->slots[j];
5602 + if (slotj->spid != 0)
5605 + if (j < lo->nslot) {
5607 + sprintf (buf + len, "[%s] Kpids:\t%d",
5609 + atomic_read (&lo->cthreads));
5610 + for (k = 0; k < lo->nslot; k++) {
5611 + struct nbd_slot *slotk =
5614 + sprintf (buf + len, "\t(%u)",
5617 + len += sprintf (buf + len, "\n");
5621 + NBD_PROC_LABEL (18);
5623 + NBD_PROC_LABEL (19);
5625 + // PTB have to tell loop head that we are not reentering
5626 + next_label = NULL;
5627 + next_label_name = NULL;
5630 + NBD_PROC_LABEL (20);
5632 + if (last == i - 2) {
5633 + struct nbd_device * lo = nbd_get (i - 1);
5634 + char *prevnam = lo->devnam;
5636 + sprintf (buf + len, "Device %s:\tClosed\n", prevnam);
5639 + if (last < i - 2) {
5642 + struct nbd_device * llo = nbd_get (last + 1);
5643 + struct nbd_device * plo = nbd_get (i - 1);
5644 + strncpy (lastnam, llo->devnam, 3);
5645 + strncpy (prevnam, plo->devnam, 3);
5646 + len += sprintf (buf + len, "Device %s-%s:\tClosed\n",
5647 + lastnam, prevnam);
5650 + NBD_PROC_LABEL (21);
5652 + // PTB re-init vital statistics for next time
5653 + next_label = NULL;
5654 + next_label_name = NULL;
5664 + * PTB read an int from a string. Return number of ints read (0 or 1).
5667 +sscani (char *buf, int len, int *n)
5671 + short has_digits = 0;
5672 + short is_signed = 0;
5674 + // PTB look for first significant character
5675 + for (i = 0; i < len; i++) {
5677 + if (c == ' ' || c == '\t') {
5680 + } else if (c == '-') {
5684 + } else if (c == '+') {
5688 + } else if (c >= '0' && c <= '9') {
5696 + // PTB i now points at first digit if there is one
5699 + for (; i < len; i++) {
5701 + if (c < '0' || c > '9')
5706 + if (is_signed >= 0) {
5715 + * look for a 1 or 2 letter device code ("a" or "aa") and save the
5716 + * device number to which it refers. Return number of device letter
5717 + * codes found (0 or 1).
5720 +sscana (char *buf, int len, int *n)
5724 + short has_letters = 0;
5726 + for (i = 0; i < len; i++) {
5728 + if (c >= 'a' && c <= 'z') {
5731 + } else if (c == ' ') {
5740 + for (; i < len; i++) {
5742 + if (c < 'a' || c > 'z')
5752 + * read an integer (or 2-letter ascii) arg into an int. Return numner
5753 + * of integers read (0 or 1) and -1 for no keymatch. The first arg is a
5755 + * @i is the integer value that results
5756 + * @j is an index if one one supplied (foo[j] = i ), else -1
5759 +getarg (const char *buffer, int buflen, const char *key, int *i, int *j)
5764 + void skip_ws (void) {
5765 + while (buflen > 0) {
5766 + if (*buffer != ' ' && *buffer != '\t')
5775 + keylen = strlen (key);
5776 + if (strncmp (buffer, key, keylen))
5785 + if (*buffer == '[') {
5794 + closing = strchr (buffer, ']');
5797 + indexlen = closing - buffer;
5800 + if (sscani ((char *) buffer, indexlen, j) < 1)
5802 + if (sscana ((char *) buffer, buflen, j) < 1)
5806 + buflen -= indexlen;
5814 + if (*buffer != '=')
5822 + if (sscani ((char *) buffer, buflen, i) < 1)
5824 + if (sscana ((char *) buffer, buflen, i) < 1)
5830 + * PTB - write a 0 with echo -n 0 to /proc/nbdinfo to do a hard reset.
5833 +nbd_write_proc (struct file *file, const char *buffer, unsigned long count,
5842 + if (buffer[1] != '\n')
5844 + /* else fallthru to case 1 */
5846 + switch (*buffer) {
5848 + for (i = 0; i < MAX_NBD; i++) {
5849 + struct nbd_device *lo = nbd_get(i);
5850 + lo->hard_reset (lo);
5854 + for (i = 0; i < MAX_NBD; i++) {
5855 + // PTB this takes the io spinlock and our spinlock.
5856 + struct nbd_device *lo = nbd_get(i);
5857 + lo->soft_reset (lo);
5858 + lo->reenable_delay(lo, 5);
5866 + int merge_requests;
5870 +#ifndef NO_BUFFERED_WRITES
5871 + int buffer_writes;
5877 + if (getarg (buffer, count, "merge_requests",
5878 + &merge_requests, &index) >= 0) {
5880 + set_merge_requests (merge_requests, index);
5883 + if (getarg (buffer, count, "sync_intvl",
5884 + &sync_intvl, &index) >= 0
5885 + || getarg (buffer, count, "sync",
5886 + &sync_intvl, &index) >= 0) {
5888 + set_sync_intvl (sync_intvl, index);
5891 + if (getarg (buffer, count, "show_errs",
5892 + &show_errs, &index) >= 0) {
5894 + set_show_errs (show_errs, index);
5897 + if (getarg (buffer, count, "md5sum",
5898 + &md5sum, &index) >= 0) {
5900 + set_md5sum (md5sum, index);
5903 +#ifndef NO_BUFFERED_WRITES
5904 + if (getarg (buffer, count, "buffer_writes",
5905 + &buffer_writes, &index) >= 0) {
5907 + set_buffer_writes (buffer_writes, index);
5910 +#endif /* NO_BUFFERED_WRITES */
5911 + if (getarg (buffer, count, "enable",
5912 + &enable, &index) >= 0) {
5914 + set_enable (enable, index);
5917 + if (getarg (buffer, count, "direct",
5918 + &direct, &index) >= 0) {
5920 + set_direct(direct, index);
5923 + if (getarg (buffer, count, "reset",
5924 + &reset, &index) >= 0) {
5926 + do_reset(reset, index);
5929 + NBD_ERROR ("illegal %ld character command\n",
5939 +nbd_init_proc(struct proc_dir_entry *res) {
5940 + res->read_proc = nbd_read_proc;
5941 + res->write_proc = nbd_write_proc;
5944 --- linux-2.5.64/drivers/Makefile.pre-enbd Wed Mar 5 04:29:33 2003
5945 +++ linux-2.5.64/drivers/Makefile Mon Mar 24 17:44:04 2003
5947 obj-$(CONFIG_SBUS) += sbus/
5948 obj-$(CONFIG_ZORRO) += zorro/
5949 obj-$(CONFIG_MAC) += macintosh/
5950 +obj-$(CONFIG_ENBD) += block/enbd/
5951 obj-$(CONFIG_PARIDE) += block/paride/
5952 obj-$(CONFIG_TC) += tc/
5953 obj-$(CONFIG_USB) += usb/
5954 --- linux-2.5.64/include/linux/enbd.h.pre-enbd Tue Mar 25 15:20:38 2003
5955 +++ linux-2.5.64/include/linux/enbd.h Tue Mar 25 14:17:37 2003
5957 +#ifndef LINUX_ENBD_H
5958 +#define LINUX_ENBD_H
5960 +/* unsigned comments are Pavel's originals for 2.1.*
5961 + * pavel@atrey.karlin.mff.cuni.cz (Pavel Machek)
5962 + * comments marked PTB are from
5963 + * ptb@it.uc3m.es (Peter T. Breuer)
5964 + * comments marked AMARIN are from
5965 + * amarin@it.uc3m.es (Andres Marin Lopez)
5968 +#include <asm/types.h>
5970 +#ifndef NBD_VERSION
5971 +#define NBD_VERSION "2.4.30 $Date$"
5972 +#endif /*NBD_VERSION*/
5975 + * Third type of request apart from READ or WRITE
5990 + # define SPECIAL 4
5994 + * We need extra bits of req->flags
5996 + # define __REQ_NBD __REQ_NR_BITS
5997 + # define REQ_NBD (1 << __REQ_NBD)
5998 + # define __REQ_NBDSEQNO (__REQ_NR_BITS + 1)
5999 + # define REQ_NBDSEQNO (1 << __REQ_NBDSEQNO)
6000 + // PTB ... and all the other bits are seqno too!
6002 +/* PTB - new style ioctl assignments */
6003 + #define NBD_SET_SOCK _IOW(0xab, 0x00, int)
6004 + #define NBD_TEST_IOCTL1 _IOW(0xab, 0x01, int)
6005 + #define NBD_SET_SIZE _IOW(0xab, 0x02, int)
6006 + #define NBD_DO_IT _IOW(0xab, 0x03, int)
6007 + #define NBD_CLEAR_SOCK _IOW(0xab, 0x04, int)
6008 + #define NBD_CLEAR_QUE _IO (0xab, 0x05)
6009 + #define NBD_PRINT_DEBUG _IO (0xab, 0x06)
6010 + #define NBD_TEST_IOCTL2 _IOR(0xab, 0x07, int)
6011 + #define NBD_HARD_RESET _IO (0xab, 0x09)
6012 + #define NBD_DEC_USE_COUNT _IO (0xab, 0x09)
6013 + #define MY_NBD_ACK _IOW(0xab, 0x0a, char *)
6014 + #define MY_NBD_GET_REQ _IOW(0xab, 0x0b, char *)
6015 + #define MY_NBD_REG_BUF _IOW(0xab, 0x0c, char *)
6016 + #define MY_NBD_CLR_REQ _IOW(0xab, 0x0d, int)
6017 + #define MY_NBD_SYNC _IOW(0xab, 0x0e, int)
6018 + #define NBD_SET_SECTORS _IOW(0xab, 0x0f, int)
6019 + #define MY_NBD_SET_SIG _IOW(0xab, 0x10, int *)
6020 + #define NBD_RESET _IO (0xab, 0x11)
6021 + #define NBD_TEST_IOCTL3 _IOWR(0xab, 0x12, int)
6022 + #define MY_NBD_ERR_REQ _IOW(0xab, 0x13, int)
6023 + #define MY_NBD_SET_INTVL _IOW(0xab, 0x14, int)
6024 + #define MY_NBD_SET_SHOW_ERRS _IOW(0xab, 0x15, int)
6025 + #define NBD_SET_MD5SUM _IOW(0xab, 0x16, int)
6026 + #define MY_NBD_SET_BUFFERWR _IOW(0xab, 0x17, int)
6027 + #define MY_NBD_INVALIDATE _IOW(0xab, 0x18, int)
6028 + #define MY_NBD_SET_SPID _IOW(0xab, 0x19, int)
6029 + #define MY_NBD_SET_RQ_HANDLE _IOW(0xab, 0x1a, void*)
6030 + #define MY_NBD_SET_RQ_SEQNO _IOW(0xab, 0x1b, int)
6031 + #define MY_NBD_SET_RQ_DIGEST _IOWR(0xab, 0x1d, nbd_digest_t)
6032 + #define NBD_TEST_IOCTL4 _IOR(0xab, 0x1e, char[256])
6033 + #define NBD_TEST_IOCTL5 _IOWR(0xab, 0x1f, char[256])
6034 + #define NBD_TEST_IOCTL6 _IO(0xab, 0x20) // special r 256B
6035 + #define NBD_TEST_IOCTL7 _IO(0xab, 0x21) // special rw 256B
6036 + #define NBD_SET_BLKSIZE _IOW(0xab, 0x22, int)
6037 + #define NBD_GET_BLKSIZE _IOR(0xab, 0x23, long)
6038 + #define NBD_SET_PF_MEMALLOC _IOW(0xab, 0x24, int)
6039 + #define MY_NBD_SET_DIRECT _IOW(0xab, 0x25, int)
6040 + #define MY_NBD_GET_NPORT _IOR(0xab, 0x26, int)
6042 +#define MAX_NBD 16 /* PTB MAX was 128, but that's a lot */
6043 +#define NBD_SHIFT 4 /* PTB 16 partitions/sockets/slots per device */
6044 + /* PTB number of socket slots per device */
6045 +#define NBD_MAXCONN (1<<NBD_SHIFT)
6046 +#define NBD_SIGLEN 128 /* PTB length of sig on device */
6047 +#define NBD_MAX_SECTORS 512 /* PTB max number of 512B sectors in a buffer */
6050 +#if defined(MAJOR_NR) || defined(__KERNEL__)
6051 + /* PTB we are included from the kernel nbd.c file so put kernel stuff here */
6053 + #include <linux/config.h>
6055 + #define ENDREQ_NOCURRENT
6056 + #define LOCAL_END_REQUEST
6057 + #include <linux/blk.h>
6060 + /* PTB various defaults */
6061 + #define NBD_RAHEAD_DFLT 24 /* PTB slow medium */
6062 + #define NBD_SYNC_INTVL 0 /* PTB sync every nK reqs (default disable) */
6063 + #define NBD_REQ_TIMEO 5 /* PTB client inactivity chk intvl (rollback) */
6064 + #define NBD_SPEED_LIM 100000 /* PTB limit to 100M write reqs/s */
6065 + #define NBD_MERGE_REQ_DFLT 0 /* PTB until accounting fixed! */
6066 + /* PTB Jens Axboe says that plug should always be set in 2.4.* */
6067 + #define NBD_PLUG_DFLT 1
6068 + #define NBD_MD5SUM_DFLT 0
6071 + * PTB User messaging defs.
6074 + #define NBD_ID "NBD #%d[%d]: %s "
6076 + #define NBD_DEBUG(level, s...) \
6077 + { static int icnt; printk( KERN_DEBUG NBD_ID, __LINE__, icnt++, __FUNCTION__); printk(s);}
6078 + #define NBD_ERROR( s...) \
6079 + { static int icnt; printk( KERN_ERR NBD_ID, __LINE__, icnt++, __FUNCTION__); printk(s);}
6080 + #define NBD_ALERT( s...) \
6081 + { static int icnt; printk( KERN_ALERT NBD_ID, __LINE__, icnt++, __FUNCTION__); printk(s);}
6082 + #define NBD_INFO( s...) \
6083 + { static int icnt; printk( KERN_INFO NBD_ID, __LINE__, icnt++, __FUNCTION__); printk(s);}
6088 + struct file * file; /* PTB add - for refcnt, NULL if slot empty */
6089 + struct socket * sock; /* PTB add */
6090 + int in; /* PTB add - tot blocks entered */
6091 + int out; /* PTB add - tot blocks released */
6092 + int err; /* PTB add - tot blocks errored */
6093 + int req; /* PTB add - tot blocks pending */
6094 + char * buffer; /* PTB add - user space buffer */
6095 + int bufsiz; /* PTB add - user space buffer size */
6096 + struct list_head queue;
6097 + unsigned long req_age; /* PTB add - age of pending req */
6098 + unsigned long cli_age; /* PTB add - age of client */
6099 + struct nbd_device *lo; /* PTB add - parent device */
6100 + #define NBD_SLOT_RUNNING 0x0001
6101 + #define NBD_SLOT_WAITING 0x0002
6102 + #define NBD_SLOT_BUFFERED 0x0004
6103 + #define NBD_SLOT_MD5SUM 0x8000 /* slot reply has a digest in it ..*/
6104 + #define NBD_SLOT_MD5_OK 0x10000 /* .. and equaled req's */
6105 + int flags; /* PTB add */
6106 + int i; /* PTB add - slot number */
6107 + int buflen; /* PTB add - buffer byte count */
6108 + int pid; /* PTB add - client process */
6109 + int refcnt; /* PTB add - so can set_sock/clr_sock ourself */
6110 + int nerrs; /* PTB add - local error count */
6111 + int spid; /* PTB add - server pid */
6117 + struct semaphore access_sem;
6118 + int (*notify_fn)(kdev_t, int);
6119 + int (*notify)(struct nbd_md *,kdev_t);
6120 + int (*unnotify)(struct nbd_md *,kdev_t);
6121 + void (*dec)(struct nbd_md *);
6122 + void (*inc)(struct nbd_md *);
6123 + void (*reg)(struct nbd_md *, int(*)(kdev_t, int));
6126 + struct nbd_speed {
6127 + atomic_t speed; /* PTB add - current speed in KB/s */
6128 + atomic_t speedmax; /* PTB add - max speed */
6129 + atomic_t speedav; /* PTB add - average speed */
6130 + atomic_t distance; /* PTB add - last distance measure */
6131 + atomic_t jiffy; /* PTB add - last jiffies speed set */
6132 + atomic_t frstj; /* PTB add - first jiffies */
6133 + void (*update)(struct nbd_speed*, int);
6136 + struct nbd_md_list {
6137 + struct list_head list;
6141 + struct nbd_seqno; // forward decl
6142 + struct nbd_seqno {
6143 + unsigned int seqno; /* PTB add - sequence number */
6144 + atomic_t seqno_gen; /* PTB add - seqno genration */
6145 + void (*inc)(struct nbd_seqno *);
6146 + int (*get)(struct nbd_seqno *);
6147 + void (*reset)(struct nbd_seqno *);
6148 + unsigned (*calc)(struct nbd_seqno *, unsigned);
6151 + struct nbd_device {
6154 + #define NBD_READ_ONLY 0x0001
6155 + #define NBD_WRITE_NOCHK 0x0002
6156 + #define NBD_INITIALISED 0x0004
6157 + #define NBD_SIGNED 0x0008
6159 + #define NBD_ENABLED 0x0010
6160 + #define NBD_SIZED 0x0020
6161 + #define NBD_BLKSIZED 0x0040
6163 + #define NBD_QBLOCKED 0x0100
6164 + #define NBD_SHOW_ERRS 0x0200
6165 + #define NBD_SYNC 0x0400
6166 + #define NBD_VALIDATED 0x0800 /* read partition table */
6168 + #define NBD_BUFFERWR 0x1000 /* buffer writes to device */
6169 + #define NBD_REMOTE_INVALID \
6170 + 0x2000 /* remote resource vanished */
6171 + #define NBD_DIRECT 0x4000 /* convert opens to O_DIRECT */
6172 + #define NBD_MD5SUM 0x8000
6176 + int harderror; /* Code of hard error */
6177 + int magic; /* FIXME: not if debugging is off */
6178 + struct list_head queue;
6179 + rwlock_t queue_lock; /* PTB add - spinlock */
6180 + int nslot; /* PTB add - total slots */
6181 + atomic_t islot; /* PTB add - current slot */
6182 + int aslot; /* PTB add - total active slots*/
6183 + atomic_t requests_in[2]; /* PTB add - blocks put on queue */
6184 + atomic_t requests_out[2]; /* PTB add - blocks out from queue */
6185 + atomic_t requests_err; /* PTB add - blocks erred on queue */
6186 + atomic_t wrequests_5so; /* PTB add - write blocks md5 skip */
6187 + atomic_t wrequests_5wo; /* PTB add - write blocks md5 wr */
6188 + atomic_t wrequests_5eo; /* PTB add - write blocks md5 refus*/
6189 + atomic_t wrequests_5to; /* PTB add - write blocks md5sum */
6190 + atomic_t wrequests_5co; /* PTB add - write blocks md5 tot */
6191 + atomic_t wrequests_5no; /* PTB add - write blocks not md5 */
6192 + atomic_t requests_req[2]; /* PTB add - read blocks pending */
6193 + atomic_t kwaiters; /* PTB add - kernel thrds waiting */
6194 + atomic_t kthreads; /* PTB add - kernel threads in */
6195 + atomic_t maxq[2]; /* PTB add - max req queue depth */
6196 + atomic_t countq[2]; /* PTB add - request queue depth */
6197 + atomic_t errors; /* PTB add - tot requests errored */
6198 + struct nbd_seqno seqno_out; /* PTB add - seq number */
6199 + atomic_t cwaiters; /* PTB add - client thrds waiting */
6200 + atomic_t cthreads; /* PTB add - client threads in */
6201 + atomic_t req_in[2][1 + NBD_MAX_SECTORS/2];
6202 + wait_queue_head_t wq; /* PTB add */
6203 + struct nbd_slot slots[NBD_MAXCONN]; /* PTB add - client array */
6204 + unsigned blksize; /* PTB add - device blksize in B */
6205 + u64 bytesize; /* PTB add - device size in B */
6206 + u64 sectors; /* PTB add - device size (sectors) */
6207 + unsigned size; /* PTB add - device size in blks */
6208 + unsigned logblksize; /* PTB add - log2 blksize */
6209 + unsigned nbd; /* PTB add - this array index */
6210 + int signature[NBD_SIGLEN/sizeof(int)];
6211 + /* PTB add - server sig */
6212 + struct file * file; /* PTB add - for ref */
6213 + struct inode * inode; /* PTB add - for ref */
6214 + int bufsiz; /* PTB add - userspace buffer size */
6215 + atomic_t kmax; /* PTB add - max kernel threads */
6216 + char *blockmap; /* PTB add - map of block states */
6217 + unsigned long disabled; /* PTB add - when was it disabled */
6218 + int req_timeo; /* PTB add - inactivity timeout */
6219 + struct timer_list run_queue; /* PTB add - run queue */
6220 + struct work_struct task_queue; /* PTB add - task queue */
6221 + char devnam[4]; /* PTB add - drive letters */
6222 + atomic_t maxreqblks; /* PTB add - maximum req size seen */
6223 + int max_sectors; /* PTB add - max req size allowed! */
6224 + int lives; /* PTB add - # times enabled */
6225 + // PTB speed measurement settings
6226 + struct nbd_speed tspeed;
6227 + struct nbd_speed wspeed;
6228 + struct nbd_speed rspeed;
6229 + int dummy; /* PTB add - unused */
6230 + struct request *req; /* PTB fake request for ioctls */
6231 + wait_queue_head_t req_wq; /* PTB req done notifications */
6232 + struct request *rq; /* PTB special request ptr */
6233 + struct list_head altqueue; /* PTB diverted requests */
6234 + rwlock_t altqueue_lock; /* PTB add - diverted reqs lock */
6235 + atomic_t seqno_in; /* PTB add - unacked reqs */
6236 + struct semaphore pid_sem; /* PTB control setting pid */
6237 + struct gendisk *disk; /* PTB for partitions */
6238 + struct request_queue *q; /* PTB make queue internal */
6239 + rwlock_t meta_lock; /* PTB add - spinlock meta data */
6240 + atomic_t merge_requests; /* PTB local req blks limit - 1 */
6241 + unsigned long reenable_time; /* PTB time to delayed reenable */
6242 + void (*enable) (struct nbd_device *lo);
6243 + void (*reset) (struct nbd_device *lo, int i);
6244 + int (*disable) (struct nbd_device *lo);
6245 + int (*read_only) (struct nbd_device *lo);
6246 + void (*set_speed) (struct nbd_device *lo);
6247 + int (*hard_reset)(struct nbd_device *lo);
6248 + int (*soft_reset)(struct nbd_device *lo);
6249 + int (*reenable_delay) (struct nbd_device *lo, int delay);
6252 +#endif /* MAJOR_NR */
6256 +/* Pavel - This now IS in some kind of include file... */
6259 +#define NBD_INIT_MAGIC 0x12345678 /* AMARIN */
6260 +#define NBD_REQUEST_MAGIC 0x25609513
6261 +#define NBD_REPLY_MAGIC 0x67446698
6262 +/* Pavel - Do *not* use magics: 0x12560953 0x96744668.
6265 +#define NBD_DEV_MAGIC 0x68797548
6267 +#define NBD_REQUEST_MAGIC_T __u32
6268 +#define NBD_REQUEST_TYPE_T __u32
6269 +#define NBD_REQUEST_FROM_T __u64
6270 +#define NBD_REQUEST_LEN_T __u32
6271 +#define NBD_REQUEST_FLAGS_T __u32
6272 +#define NBD_REQUEST_TIME_T __u64
6273 +#define NBD_REQUEST_ZONE_T __u64
6274 +#define NBD_REQUEST_SPECIAL_T __u32
6276 +#define NBD_REPLY_MAGIC_T __u32
6277 +#define NBD_REPLY_ERROR_T __s32
6278 +#define NBD_REPLY_FLAGS_T __u32
6279 +#define NBD_REPLY_TIME_T __u64
6280 +#define NBD_REPLY_ZONE_T __u64
6282 +#define NBD_REQUEST_HANDLE_T __u32
6283 +#define NBD_REPLY_HANDLE_T __u32
6285 + typedef __u32 nbd_digest_t[4];
6287 + #define NBD_DIGEST_T nbd_digest_t
6289 +#define NBD_REQUEST_DIGEST_T nbd_digest_t
6290 +#define NBD_REPLY_DIGEST_T nbd_digest_t
6293 +#define NBD_DIGEST_BITS 128
6294 +#define NBD_DIGEST_LENGTH ((NBD_DIGEST_BITS)/8)
6295 +#define NBD_REQUEST_SEQNO_T __u32
6297 +struct nbd_request {
6298 + NBD_REQUEST_MAGIC_T magic;
6299 + NBD_REQUEST_TYPE_T type; /* == READ || == WRITE */
6300 + NBD_REQUEST_HANDLE_T handle;
6301 + NBD_REQUEST_FROM_T from; /* 64 bit PTB 132 */
6302 + NBD_REQUEST_LEN_T len;
6306 +#define NBD_REQUEST_ERRORED 0x0800
6307 +#define NBD_REQUEST_MD5SUM 0x8000 /* has a digest in it ..*/
6308 +#define NBD_REQUEST_MD5_OK 0x10000 /* .. and equaled req's */
6309 +#define NBD_REQUEST_IOCTL 0x40000 /* ioctl in len, arg in from */
6310 +#define NBD_REQUEST_SPECIALRW 0x80000 /* 1 for w 0 for r on special */
6311 + NBD_REQUEST_FLAGS_T flags;
6312 + NBD_REQUEST_TIME_T time;
6313 + NBD_REQUEST_ZONE_T zone;
6314 + NBD_REQUEST_SEQNO_T seqno;
6316 + NBD_REQUEST_DIGEST_T digest;
6318 + NBD_REQUEST_SPECIAL_T special;
6320 + char dummy1[0] __attribute__ ((aligned (64)));
6321 +} __attribute__ ((packed)) ;
6323 + #define NBD_REQUEST_LENGTH sizeof(struct nbd_request)
6326 + NBD_REPLY_MAGIC_T magic;
6327 + NBD_REPLY_ERROR_T error; /* 0 = ok, else error */
6328 + NBD_REPLY_HANDLE_T handle; /* handle you got from request */
6332 +#define NBD_REPLY_ERRORED 0x0800
6333 +#define NBD_REPLY_MD5SUM 0x8000 /* has a digest in it .. */
6334 +#define NBD_REPLY_MD5_OK 0x10000 /* .. and equaled req's */
6335 +#define NBD_REPLY_CLOSE 0x20000 /* close cmd from server */
6336 +#define NBD_REPLY_IOCTL 0x40000 /* ioctl in len, arg in from */
6337 + NBD_REPLY_FLAGS_T flags;
6338 + NBD_REPLY_TIME_T time;
6339 + NBD_REPLY_ZONE_T zone;
6341 + NBD_REPLY_DIGEST_T digest;
6344 + char dummy1[0] __attribute__ ((aligned (64)));
6345 +} __attribute__ ((packed)) ;
6347 + #define NBD_REPLY_LENGTH sizeof(struct nbd_reply)
6349 + #define NBD_BUFFER_DATA_OFFSET \
6350 + ((NBD_REQUEST_LENGTH>NBD_REPLY_LENGTH)?NBD_REQUEST_LENGTH:NBD_REPLY_LENGTH)
6354 + // PTB forward declaration
6355 + static struct nbd_device nbd_dev[];
6358 + static long wait_for_completion_timeout(struct completion *x, long timeout)
6360 + spin_lock_irq(&x->wait.lock);
6361 + if (!x->done && timeout > 0) {
6362 + DECLARE_WAITQUEUE(wait, current);
6364 + wait.flags |= WQ_FLAG_EXCLUSIVE;
6365 + __add_wait_queue_tail(&x->wait, &wait);
6367 + __set_current_state(TASK_UNINTERRUPTIBLE);
6368 + spin_unlock_irq(&x->wait.lock);
6369 + timeout = schedule_timeout(timeout);
6370 + spin_lock_irq(&x->wait.lock);
6371 + } while (!x->done && timeout > 0);
6372 + __remove_wait_queue(&x->wait, &wait);
6379 + spin_unlock_irq(&x->wait.lock);
6383 + static void end_request(struct request *req, int uptodate) {
6386 + struct nbd_device *lo = req->rq_disk->private_data;
6387 + static int rq_type(struct request *);
6389 + if (rq_type(req) == IOCTL) {
6390 + // PTB this is the devices ioctl request
6391 + complete(req->waiting);
6392 + // PTB let the driver code return the req, etc.
6396 + /* unlock chained buffers */
6397 + while ((bio = req->bio) != NULL) {
6398 + unsigned nsect = bio_sectors(bio);
6399 + blk_finished_io(nsect);
6400 + req->bio = bio->bi_next;
6401 + bio->bi_next = NULL;
6402 + bio_endio(bio, nsect << 9, uptodate ? 0 : -EIO);
6405 + if (req->flags & REQ_SPECIAL)
6406 + // don't account specials
6409 + write_lock(&lo->altqueue_lock);
6410 + if (atomic_read(&lo->countq[READ])
6411 + + atomic_read(&lo->countq[WRITE]) == 0) {
6412 + if (atomic_read(&lo->flags) & NBD_QBLOCKED) {
6413 + static int nbd_requeue(struct nbd_device *);
6415 + atomic_clear_mask(NBD_QBLOCKED, &lo->flags);
6418 + write_unlock(&lo->altqueue_lock);
6422 + * PTB This takes the spinlock itself! So call it with the io spinlock
6425 + static void end_request_lock(struct request *req, int uptodate) {
6427 + unsigned long flags;
6428 + request_queue_t *q = req->q;
6430 + spin_lock_irqsave(q->queue_lock, flags);
6431 + end_request(req, uptodate);
6432 + spin_unlock_irqrestore(q->queue_lock, flags);
6436 + * PTB Call this only with the io spinlock * held.
6438 + static inline void nbd_end_request(struct request *req) {
6440 + // PTB the kernel has only 2 queues, read and write, and it uses
6441 + // the cmd field to determine to which the req belongs. We add a
6442 + // seqno to it in nbd_do_req, so we reestablish it here.
6443 + static void rq_set_seqno(struct request *, int);
6445 + rq_set_seqno(req, 0); // PTB Zero extra seqno info
6446 + end_request( req, (req->errors == 0) ? 1 : 0 );
6450 + * PTB This takes the spinlock itself! So call it with the io spinlock
6453 + static void nbd_end_request_lock(struct request *req) {
6455 + // PTB the kernel has only 2 queues, read and write, and it uses
6456 + // the cmd field to determine to which the req belongs. We add a
6457 + // seqno to it in nbd_do_req, so we reestablish it here.
6458 + static void rq_set_seqno(struct request *, int);
6460 + rq_set_seqno(req, 0); // PTB Zero extra seqno info
6461 + end_request_lock( req, !req->errors );
6464 + extern int nbd_init_seqno(struct nbd_seqno *);
6465 + extern int nbd_init_speed(struct nbd_speed *);
6466 + extern int nbd_init_md(struct nbd_md *);
6467 + extern void nbd_init_proc(struct proc_dir_entry *res);
6469 + #endif /* MAJOR_NR */
6471 +#endif /* LINUX_ENBD_H */
6474 --- linux-2.5.64/include/linux/enbd_ioctl.h.pre-enbd Tue Mar 25 15:20:45 2003
6475 +++ linux-2.5.64/include/linux/enbd_ioctl.h Mon Mar 24 22:52:26 2003
6477 +#ifndef NBD_IOCTL_H
6478 +#define NBD_IOCTL_H 1
6480 +int nbd_ioctl_convert(int ioctl);
6481 +int nbd_ioctl_convert_inplace(int *ioctl);
6482 +int nbd_ioctl_revert(int ioctl);
6483 +int nbd_ioctl_size (int cmd, char *arg);
6484 +int nbd_ioctl_size_user (int cmd, char *arg);
6486 +int nbd_ioctl_copy_to_user (int cmd, char *arg, char *buf, int size);
6487 +int nbd_ioctl_copy_from_user (int cmd, char *buf, char *arg, int size);
6490 + * PTB object containing all the above methods, to be registered with
6491 + * the enbd.o module
6494 +#define NBD_REMOTE_IOCTL_ENABLED 0x01
6495 + unsigned long flags;
6496 + int (*convert) (int ioctl);
6497 + int (*convert_inplace)(int *ioctl);
6498 + int (*revert) (int ioctl);
6499 + int (*size) (int cmd, char *arg);
6500 + int (*size_user) (int cmd, char *arg);
6501 + int (*cp_to_user) (int cmd, char *arg, char *buf, int size);
6502 + int (*cp_from_user) (int cmd, char *buf, char *arg, int size);
6505 +struct nbd_ioctl_stub {
6506 + struct nbd_ioctl * remote;
6507 + int (*reg) (struct nbd_ioctl_stub *,struct nbd_ioctl *);
6508 + int (*unreg) (struct nbd_ioctl_stub *,struct nbd_ioctl *);
6511 +extern struct nbd_ioctl_stub nbd_remote_ioctl;
6512 +extern int nbd_init_ioctl_stub(struct nbd_ioctl_stub *);
6515 +// PTB conversion table entries
6516 +struct ioctl_conv {
6517 + unsigned int old; // ioctl id, _IO or _IOR or _IOW or _IOWR
6518 + unsigned int new; // ioctl id
6521 +// PTB extended conversion table entries
6522 +struct ioctl_special {
6524 + int (*size) (char *arg);
6525 + int (*size_user) (char *arg);
6526 + int (*ioctl_copy_from_user)(char *buf, char*arg, int size);
6527 + int (*ioctl_copy_to_user)(char *arg, char*buf, int size);
6530 +extern int nbd_init_ioctl_stub(struct nbd_ioctl_stub *);
6532 +#endif /* NBD_IOCTL_H */