]> git.pld-linux.org Git - packages/kernel.git/blob - linux-cluster-gnbd.patch
- obsolete
[packages/kernel.git] / linux-cluster-gnbd.patch
1 diff -urN linux-2.6.9/drivers/block/Kconfig linux-2.6.9-patched/drivers/block/Kconfig
2 --- linux-2.6.9/drivers/block/Kconfig   2004-10-18 16:53:43.000000000 -0500
3 +++ linux-2.6.9-patched/drivers/block/Kconfig   2004-10-22 13:27:52.938836304 -0500
4 @@ -356,6 +356,13 @@
5           your machine, or if you want to have a raid or loopback device
6           bigger than 2TB.  Otherwise say N.
7  
8 +config BLK_DEV_GNBD
9 +       tristate "Global network block device support"
10 +       depends on NET
11 +       ---help---
12 +         
13 +         If unsure, say N.
14 +
15  source "drivers/s390/block/Kconfig"
16  
17  endmenu
18 diff -urN linux-2.6.9/drivers/block/Makefile linux-2.6.9-patched/drivers/block/Makefile
19 --- linux-2.6.9/drivers/block/Makefile  2004-10-18 16:54:55.000000000 -0500
20 +++ linux-2.6.9-patched/drivers/block/Makefile  2004-10-22 13:30:48.224188880 -0500
21 @@ -43,4 +43,4 @@
22  obj-$(CONFIG_VIODASD)          += viodasd.o
23  obj-$(CONFIG_BLK_DEV_SX8)      += sx8.o
24  obj-$(CONFIG_BLK_DEV_UB)       += ub.o
25 -
26 +obj-$(CONFIG_BLK_DEV_GNBD)     += gnbd.o
27 diff -urN linux-2.6.9/drivers/block/gnbd.c linux-2.6.9-patched/drivers/block/gnbd.c
28 --- linux-2.6.9/drivers/block/gnbd.c    1969-12-31 18:00:00.000000000 -0600
29 +++ linux-2.6.9-patched/drivers/block/gnbd.c    2004-10-22 13:43:33.303879088 -0500
30 @@ -0,0 +1,1053 @@
31 +/******************************************************************************
32 +*******************************************************************************
33 +**
34 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
35 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
36 +**
37 +**  This copyrighted material is made available to anyone wishing to use,
38 +**  modify, copy, or redistribute it subject to the terms and conditions
39 +**  of the GNU General Public License v.2.
40 +**
41 +*******************************************************************************
42 +******************************************************************************/
43 +
44 +/* Large chunks of this code were lifted from nbd.c */
45 +
46 +#include <linux/major.h>
47 +
48 +#include <linux/blkdev.h>
49 +#include <linux/module.h>
50 +#include <linux/init.h>
51 +#include <linux/sched.h>
52 +#include <linux/fs.h>
53 +#include <linux/bio.h>
54 +#include <linux/stat.h>
55 +#include <linux/errno.h>
56 +#include <linux/file.h>
57 +#include <linux/ioctl.h>
58 +#include <net/sock.h>
59 +#include <linux/in.h>
60 +#include <linux/buffer_head.h>
61 +#include <linux/miscdevice.h>
62 +
63 +#include <linux/devfs_fs_kernel.h>
64 +
65 +#include <asm/uaccess.h>
66 +#include <asm/types.h>
67 +
68 +#include <linux/gnbd.h>
69 +
70 +static int major_nr = 0;
71 +uint64_t insmod_time;
72 +
73 +
74 +#define GNBD_MAGIC 0x74d06100
75 +
76 +#ifdef NDEBUG
77 +#define dprintk(flags, fmt...)
78 +#else /* NDEBUG */
79 +#define dprintk(flags, fmt...) do { \
80 +       if (debugflags & (flags)) printk(KERN_DEBUG fmt); \
81 +} while (0)
82 +#define DBG_IOCTL       0x0004
83 +#define DBG_INIT        0x0010
84 +#define DBG_EXIT        0x0020
85 +#define DBG_BLKDEV      0x0100
86 +#define DBG_RX          0x0200
87 +#define DBG_TX          0x0400
88 +static unsigned int debugflags;
89 +#endif /* NDEBUG */
90 +
91 +static struct gnbd_device gnbd_dev[MAX_GNBD];
92 +
93 +struct request shutdown_req;
94 +struct request ping_req;
95 +
96 +static spinlock_t gnbd_lock = SPIN_LOCK_UNLOCKED;
97 +
98 +#define to_gnbd_dev(d) container_of(d, struct gnbd_device, class_dev)
99 +
100 +static void gnbd_class_release(struct class_device *class_dev)
101 +{
102 +       printk("releasing gnbd class\n");
103 +       /* FIXME -- What the hell do I have to free up here */
104 +}
105 +
106 +static struct class gnbd_class = {
107 +       .name = "gnbd",
108 +       .release = gnbd_class_release
109 +};
110 +
111 +
112 +static ssize_t show_pid(struct class_device *class_dev, char *buf)
113 +{
114 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
115 +       return sprintf(buf, "%d\n", dev->receiver_pid);
116 +}
117 +
118 +static CLASS_DEVICE_ATTR(pid, S_IRUGO, show_pid, NULL);
119 +
120 +static ssize_t show_server(struct class_device *class_dev, char *buf)
121 +{
122 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
123 +       if (dev->server_name)
124 +               return sprintf(buf, "%s/%hx\n", dev->server_name,
125 +                               dev->server_port);
126 +       else
127 +               return sprintf(buf, "\n");
128 +}
129 +
130 +/* FIXME -- should a empty store free the memory */
131 +static ssize_t store_server(struct class_device *class_dev,
132 +               const char *buf, size_t count)
133 +{
134 +       int res;
135 +       short unsigned int port;
136 +       char *ptr;
137 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
138 +       if (down_trylock(&dev->do_it_lock))
139 +               return -EBUSY;
140 +       if (dev->server_name)
141 +               kfree(dev->server_name);
142 +       dev->server_name = kmalloc(count + 1, GFP_KERNEL);
143 +       if (!dev->server_name)
144 +               return -ENOMEM;
145 +       memcpy(dev->server_name, buf, count);
146 +       dev->server_name[count] = 0;
147 +       ptr = strchr(dev->server_name, '/');
148 +       if (!ptr)
149 +               return -EINVAL;
150 +       *ptr++ = 0;
151 +       res = sscanf(ptr, "%4hx", &port);
152 +       if (res != 1){
153 +               up(&dev->do_it_lock);
154 +               return -EINVAL;
155 +       }
156 +       dev->server_port = port;
157 +       up(&dev->do_it_lock);
158 +       return count;
159 +}
160 +
161 +CLASS_DEVICE_ATTR(server, S_IRUGO | S_IWUSR, show_server, store_server);
162 +
163 +static ssize_t show_name(struct class_device *class_dev, char *buf)
164 +{
165 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
166 +       return sprintf(buf, "%s\n", dev->name);
167 +}
168 +
169 +static ssize_t store_name(struct class_device *class_dev,
170 +                const char *buf, size_t count)
171 +{
172 +       int res;
173 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
174 +       if (down_trylock(&dev->do_it_lock))
175 +               return -EBUSY;
176 +       res = sscanf(buf, "%31s", dev->name);
177 +       up(&dev->do_it_lock);
178 +       if (res != 1)
179 +               return -EINVAL;
180 +       return count;
181 +}
182 +
183 +CLASS_DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_name, store_name);
184 +
185 +
186 +static ssize_t show_sectors(struct class_device *class_dev, char *buf)
187 +{
188 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
189 +       return sprintf(buf, "%Lu\n",
190 +                       (unsigned long long)get_capacity(dev->disk));
191 +}
192 +
193 +static ssize_t store_sectors(struct class_device *class_dev,
194 +               const char *buf, size_t count)
195 +{
196 +       int res;
197 +       sector_t size;
198 +       struct block_device *bdev;
199 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
200 +       
201 +       if (down_trylock(&dev->do_it_lock))
202 +               return -EBUSY;
203 +       res = sscanf(buf, "%Lu\n", &size);
204 +       if (res != 1){
205 +               up(&dev->do_it_lock);
206 +               return -EINVAL;
207 +       }
208 +       /* FIXME -- should I switch the order here, so that I don't have
209 +          capacity set to one thing and the bdev inode size set to another */ 
210 +       set_capacity(dev->disk, size);
211 +       bdev = bdget_disk(dev->disk, 0);
212 +       if (bdev) {
213 +               down(&bdev->bd_inode->i_sem);
214 +               i_size_write(bdev->bd_inode, (loff_t)size << 9);
215 +               up(&bdev->bd_inode->i_sem);
216 +               bdput(bdev);
217 +       }
218 +       up(&dev->do_it_lock);
219 +       return count;
220 +}
221 +
222 +CLASS_DEVICE_ATTR(sectors, S_IRUGO | S_IWUSR, show_sectors, store_sectors);
223 +
224 +static ssize_t show_usage(struct class_device *class_dev, char *buf)
225 +{
226 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
227 +       return sprintf(buf, "%d\n", dev->open_count);
228 +}
229 +
230 +CLASS_DEVICE_ATTR(usage, S_IRUGO, show_usage, NULL);
231 +
232 +static ssize_t show_flags(struct class_device *class_dev, char *buf)
233 +{
234 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
235 +       return sprintf(buf, "0x%04x\n", dev->flags);
236 +}
237 +
238 +static ssize_t store_flags(struct class_device *class_dev,
239 +                const char *buf, size_t count)
240 +{
241 +       int res;
242 +       
243 +        struct gnbd_device *dev = to_gnbd_dev(class_dev);
244 +        if (down_trylock(&dev->do_it_lock))
245 +                return -EBUSY;
246 +       res = sscanf(buf, "0x%hx", &dev->flags);
247 +       up(&dev->do_it_lock);
248 +        if (res != 1)
249 +                return -EINVAL;
250 +        return count;
251 +}
252 +
253 +
254 +CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags);
255 +
256 +static ssize_t show_waittime(struct class_device *class_dev, char *buf)
257 +{
258 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
259 +       if (list_empty(&dev->queue_head))
260 +               return sprintf(buf, "-1\n");
261 +       return sprintf(buf, "%ld\n",
262 +                       ((long)jiffies - (long)dev->last_received) / HZ);
263 +}
264 +
265 +CLASS_DEVICE_ATTR(waittime, S_IRUGO, show_waittime, NULL);
266 +
267 +static ssize_t show_connected(struct class_device *class_dev, char *buf)
268 +{
269 +       struct gnbd_device *dev = to_gnbd_dev(class_dev);
270 +       return sprintf(buf, "%d\n", (dev->sock != NULL));
271 +}
272 +
273 +CLASS_DEVICE_ATTR(connected, S_IRUGO, show_connected, NULL);
274 +
275 +#ifndef NDEBUG
276 +static const char *ioctl_cmd_to_ascii(int cmd)
277 +{
278 +       switch (cmd) {
279 +       case GNBD_DO_IT: return "do-it";
280 +       case GNBD_CLEAR_QUE: return "clear-que";
281 +       case GNBD_PRINT_DEBUG: return "print-debug";
282 +       case GNBD_DISCONNECT: return "disconnect";
283 +       }
284 +       return "unknown";
285 +}
286 +
287 +static const char *gnbdcmd_to_ascii(int cmd)
288 +{
289 +       switch (cmd) {
290 +       case  GNBD_CMD_READ: return "read";
291 +       case GNBD_CMD_WRITE: return "write";
292 +       case  GNBD_CMD_DISC: return "disconnect";
293 +       case GNBD_CMD_PING: return "ping";
294 +       }
295 +       return "invalid";
296 +}
297 +#endif /* NDEBUG */
298 +
299 +static void gnbd_end_request(struct request *req)
300 +{
301 +       int uptodate = (req->errors == 0) ? 1 : 0;
302 +       request_queue_t *q = req->q;
303 +       unsigned long flags;
304 +
305 +       dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name,
306 +                       req, uptodate? "done": "failed");
307 +
308 +       if (!uptodate)
309 +               printk("%s %d called gnbd_end_request with and error\n",
310 +                      current->comm, current->pid);    
311 +       
312 +       spin_lock_irqsave(q->queue_lock, flags);
313 +       if (!end_that_request_first(req, uptodate, req->nr_sectors)) {
314 +               end_that_request_last(req);
315 +       }
316 +       spin_unlock_irqrestore(q->queue_lock, flags);
317 +}
318 +
319 +/*
320 + *  Send or receive packet.
321 + */
322 +static int sock_xmit(struct socket *sock, int send, void *buf, int size,
323 +               int msg_flags)
324 +{
325 +       mm_segment_t oldfs;
326 +       int result;
327 +       struct msghdr msg;
328 +       struct iovec iov;
329 +       unsigned long flags;
330 +       sigset_t oldset;
331 +
332 +       oldfs = get_fs();
333 +       set_fs(get_ds());
334 +       /* Allow interception of SIGKILL only
335 +        * Don't allow other signals to interrupt the transmission */
336 +       spin_lock_irqsave(&current->sighand->siglock, flags);
337 +       oldset = current->blocked;
338 +       sigfillset(&current->blocked);
339 +       sigdelsetmask(&current->blocked, sigmask(SIGKILL) | sigmask(SIGTERM) |
340 +                     sigmask(SIGHUP));
341 +       recalc_sigpending();
342 +       spin_unlock_irqrestore(&current->sighand->siglock, flags);
343 +
344 +       do {
345 +               sock->sk->sk_allocation = GFP_NOIO;
346 +               iov.iov_base = buf;
347 +               iov.iov_len = size;
348 +               msg.msg_name = NULL;
349 +               msg.msg_namelen = 0;
350 +               msg.msg_iov = &iov;
351 +               msg.msg_iovlen = 1;
352 +               msg.msg_control = NULL;
353 +               msg.msg_controllen = 0;
354 +               msg.msg_namelen = 0;
355 +               msg.msg_flags = msg_flags | MSG_NOSIGNAL;
356 +
357 +               if (send)
358 +                       result = sock_sendmsg(sock, &msg, size);
359 +               else
360 +                       result = sock_recvmsg(sock, &msg, size, 0);
361 +
362 +               if (signal_pending(current)) {
363 +                       siginfo_t info;
364 +                       spin_lock_irqsave(&current->sighand->siglock, flags);
365 +                       printk(KERN_WARNING "gnbd (pid %d: %s) got signal %d\n",
366 +                               current->pid, current->comm, 
367 +                               dequeue_signal(current, &current->blocked, &info));
368 +                       spin_unlock_irqrestore(&current->sighand->siglock, flags);
369 +                       result = -EINTR;
370 +                       break;
371 +               }
372 +
373 +               if (result <= 0) {
374 +                       if (result == 0)
375 +                               result = -EPIPE; /* short read */
376 +                       break;
377 +               }
378 +               size -= result;
379 +               buf += result;
380 +       } while (size > 0);
381 +
382 +       spin_lock_irqsave(&current->sighand->siglock, flags);
383 +       current->blocked = oldset;
384 +       recalc_sigpending();
385 +       spin_unlock_irqrestore(&current->sighand->siglock, flags);
386 +
387 +       set_fs(oldfs);
388 +       return result;
389 +}
390 +
391 +static inline int sock_send_bvec(struct socket *sock, struct bio_vec *bvec,
392 +               int flags)
393 +{
394 +       int result;
395 +       void *kaddr = kmap(bvec->bv_page);
396 +       result = sock_xmit(sock, 1, kaddr + bvec->bv_offset, bvec->bv_len,
397 +                       flags);
398 +       kunmap(bvec->bv_page);
399 +       return result;
400 +}
401 +
402 +
403 +#define gnbd_send_req(dev, req) __gnbd_send_req((dev), (dev)->sock, (req))
404 +       
405 +int __gnbd_send_req(struct gnbd_device *dev, struct socket *sock,
406 +               struct request *req)
407 +{
408 +       int result, i, flags;
409 +       struct gnbd_request request;
410 +       unsigned long size = req->nr_sectors << 9;
411 +
412 +       request.magic = htonl(GNBD_REQUEST_MAGIC);
413 +       request.type = htonl(gnbd_cmd(req));
414 +       request.from = cpu_to_be64((u64) req->sector << 9);
415 +       request.len = htonl(size);
416 +       memcpy(request.handle, &req, sizeof(req));
417 +
418 +       down(&dev->tx_lock);
419 +
420 +       if (!sock) {
421 +               printk(KERN_ERR "%s: Attempted send on closed socket\n",
422 +                               dev->disk->disk_name);
423 +               result = -ENOTCONN;
424 +               goto error_out;
425 +       }
426 +
427 +       dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
428 +                       dev->disk->disk_name, req,
429 +                       gnbdcmd_to_ascii(gnbd_cmd(req)),
430 +                       (unsigned long long)req->sector << 9,
431 +                       req->nr_sectors << 9);
432 +       result = sock_xmit(sock, 1, &request, sizeof(request),
433 +                       (gnbd_cmd(req) == GNBD_CMD_WRITE)? MSG_MORE: 0);
434 +       if (result < 0) {
435 +               printk(KERN_ERR "%s: Send control failed (result %d)\n",
436 +                               dev->disk->disk_name, result);
437 +               goto error_out;
438 +       }
439 +
440 +       if (gnbd_cmd(req) == GNBD_CMD_WRITE) {
441 +               struct bio *bio;
442 +               /*
443 +                * we are really probing at internals to determine
444 +                * whether to set MSG_MORE or not...
445 +                */
446 +               rq_for_each_bio(bio, req) {
447 +                       struct bio_vec *bvec;
448 +                       bio_for_each_segment(bvec, bio, i) {
449 +                               flags = 0;
450 +                               if ((i < (bio->bi_vcnt - 1)) || bio->bi_next)
451 +                                       flags = MSG_MORE;
452 +                               dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
453 +                                               dev->disk->disk_name, req,
454 +                                               bvec->bv_len);
455 +                               result = sock_send_bvec(sock, bvec, flags);
456 +                               if (result < 0) {
457 +                                       printk(KERN_ERR "%s: Send data failed (result %d)\n",
458 +                                                       dev->disk->disk_name,
459 +                                                       result);
460 +                                       goto error_out;
461 +                               }
462 +                       }
463 +               }
464 +       }
465 +       up(&dev->tx_lock);
466 +       return 0;
467 +
468 +error_out:
469 +       up(&dev->tx_lock);
470 +       return result;
471 +}
472 +
473 +       
474 +static int gnbd_find_request(struct gnbd_device *dev, struct request *xreq)
475 +{
476 +       struct request *req;
477 +       struct list_head *tmp;
478 +
479 +       list_for_each(tmp, &dev->queue_head) {
480 +               req = list_entry(tmp, struct request, queuelist);
481 +               if (req != xreq)
482 +                       continue;
483 +               return 1;
484 +       }
485 +       return 0;
486 +}
487 +
488 +static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec)
489 +{
490 +       int result;
491 +       void *kaddr = kmap(bvec->bv_page);
492 +       result = sock_xmit(sock, 0, kaddr + bvec->bv_offset, bvec->bv_len,
493 +                       MSG_WAITALL);
494 +       kunmap(bvec->bv_page);
495 +       return result;
496 +}
497 +
498 +int gnbd_recv_req(struct gnbd_device *dev, struct request *req)
499 +{
500 +       int result;
501 +       int i;
502 +       struct bio *bio;
503 +       rq_for_each_bio(bio, req) {
504 +               struct bio_vec *bvec;
505 +               bio_for_each_segment(bvec, bio, i) {
506 +                       result = sock_recv_bvec(dev->sock, bvec);
507 +                       if (result < 0) {
508 +                               printk(KERN_ERR "%s: Receive data failed (result %d)\n",
509 +                                               dev->disk->disk_name,
510 +                                               result);
511 +                               return result;
512 +                       }
513 +                       dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
514 +                                       dev->disk->disk_name, req, bvec->bv_len);
515 +               }
516 +       }
517 +       return 0;
518 +}
519 +
520 +int gnbd_do_it(struct gnbd_device *dev)
521 +{
522 +       int result;
523 +       struct gnbd_reply reply;
524 +       struct request *req;
525 +       struct socket *sock = dev->sock;
526 +
527 +       BUG_ON(dev->magic != GNBD_MAGIC);
528 +
529 +       while((result = sock_xmit(sock, 0, &reply,sizeof(reply), MSG_WAITALL)) > 0){
530 +               if (ntohl(reply.magic) == GNBD_KEEP_ALIVE_MAGIC)
531 +                       /* FIXME -- I should reset the wait time here */
532 +                       continue;
533 +
534 +               memcpy(&req, reply.handle, sizeof(req));
535 +               if (req == &shutdown_req)
536 +                       return 0;
537 +
538 +               if (!gnbd_find_request(dev, req)){
539 +                       printk(KERN_ERR "%s: Unexpected reply (%p)\n",
540 +                                       dev->disk->disk_name, reply.handle);
541 +                       return -EBADR;
542 +               }
543 +               if (ntohl(reply.magic) != GNBD_REPLY_MAGIC) {
544 +                       printk(KERN_ERR "%s: Wrong magic (0x%lx)\n",
545 +                                       dev->disk->disk_name,
546 +                                       (unsigned long)ntohl(reply.magic));
547 +                       return -EPROTO;
548 +               }
549 +               if (ntohl(reply.error)) {
550 +                       printk(KERN_ERR "%s: Other side returned error (%d)\n",
551 +                                       dev->disk->disk_name, ntohl(reply.error));
552 +                       req->errors++;
553 +                       goto remove_req;
554 +               }
555 +               dprintk(DBG_RX, "%s: request %p: got reply\n",
556 +                               dev->disk->disk_name, req);
557 +
558 +               if (gnbd_cmd(req) == GNBD_CMD_READ){
559 +                       result = gnbd_recv_req(dev, req);
560 +                       if (result < 0)
561 +                               return result;
562 +               }
563 +remove_req:
564 +               spin_lock(&dev->queue_lock);
565 +               list_del_init(&req->queuelist);
566 +               dev->last_received = jiffies;
567 +               spin_unlock(&dev->queue_lock);
568 +               if (req != &ping_req)
569 +                       gnbd_end_request(req);
570 +       }
571 +       printk(KERN_ERR "%s: Receive control failed (result %d)\n",
572 +                       dev->disk->disk_name, result);
573 +       return result;
574 +}
575 +
576 +void gnbd_clear_que(struct gnbd_device *dev)
577 +{
578 +       struct request *req;
579 +
580 +       BUG_ON(dev->magic != GNBD_MAGIC);
581 +
582 +       do {
583 +               req = NULL;
584 +               if (!list_empty(&dev->queue_head)) {
585 +                       req = list_entry(dev->queue_head.next, struct request, queuelist);
586 +                       list_del_init(&req->queuelist);
587 +               }
588 +               if (req && req != &ping_req) {
589 +                       req->errors++;
590 +                       gnbd_end_request(req);
591 +               }
592 +       } while (req);
593 +}
594 +
595 +/*
596 + * We always wait for result of write, for now. It would be nice to make it optional
597 + * in future
598 + * if ((req->cmd == WRITE) && (dev->flags & GNBD_WRITE_NOCHK)) 
599 + *   { printk( "Warning: Ignoring result!\n"); gnbd_end_request( req ); }
600 + */
601 +
602 +static void do_gnbd_request(request_queue_t * q)
603 +{
604 +       int err;
605 +       struct request *req;
606 +       
607 +       while ((req = elv_next_request(q)) != NULL) {
608 +               struct gnbd_device *dev;
609 +
610 +               blkdev_dequeue_request(req);
611 +               dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
612 +                               req->rq_disk->disk_name, req, req->flags);
613 +
614 +               if (!(req->flags & REQ_CMD))
615 +                       goto error_out;
616 +               
617 +               dev = req->rq_disk->private_data;
618 +
619 +               if (dev->receiver_pid == -1)
620 +                       goto error_out;
621 +               
622 +               BUG_ON(dev->magic != GNBD_MAGIC);
623 +
624 +               gnbd_cmd(req) = GNBD_CMD_READ;
625 +               if (rq_data_dir(req) == WRITE) {
626 +                       gnbd_cmd(req) = GNBD_CMD_WRITE;
627 +                       if (dev->flags & GNBD_READ_ONLY) {
628 +                               printk(KERN_ERR "%s: Write on read-only\n",
629 +                                               dev->disk->disk_name);
630 +                               goto error_out;
631 +                       }
632 +               }
633 +
634 +               req->errors = 0;
635 +               spin_unlock_irq(q->queue_lock);
636 +
637 +               spin_lock(&dev->queue_lock);
638 +
639 +               if (list_empty(&dev->queue_head))
640 +                       dev->last_received = jiffies;
641 +               list_add(&req->queuelist, &dev->queue_head);
642 +               spin_unlock(&dev->queue_lock);
643 +
644 +               err = gnbd_send_req(dev, req);
645 +
646 +               spin_lock_irq(q->queue_lock);
647 +               if (err)
648 +                       goto sock_error;
649 +               continue;
650 +
651 +error_out:
652 +               req->errors++;
653 +               spin_unlock(q->queue_lock);
654 +               gnbd_end_request(req);
655 +               spin_lock(q->queue_lock);
656 +       }
657 +       return;
658 +
659 +sock_error:
660 +       return;
661 +}
662 +
663 +/*
664 + * This is called before dev-sock is set, so you dodn't need
665 + * to worry about the tx_lock or the queue_lock
666 + */
667 +static int gnbd_resend_requests(struct gnbd_device *dev, struct socket *sock)
668 +{
669 +       int err = 0;
670 +       struct request *req;
671 +       struct list_head *tmp;
672 +       
673 +       printk("resending requests\n");
674 +       list_for_each(tmp, &dev->queue_head) {
675 +               req = list_entry(tmp, struct request, queuelist);
676 +               err = __gnbd_send_req(dev, sock, req);
677 +
678 +               if (err){
679 +                       printk("failed trying to resend request (%d)\n", err);
680 +                       break;
681 +               }
682 +       }
683 +
684 +       return err;
685 +}
686 +/*
687 +static int get_server_info(struct gnbd_device *dev, struct socket *sock)
688 +{
689 +       struct sockaddr_in server;
690 +       int len;
691 +       int err;
692 +
693 +       err = sock->ops->getname(sock, (struct sockaddr *) &server, &len, 1);
694 +       if (err) {
695 +               printk(KERN_WARNING "cannot get socket info, shutting down\n");
696 +       } else{
697 +               dev->server_addr = server.sin_addr;
698 +               dev->server_port = server.sin_port;
699 +       }
700 +       return err;
701 +}
702 +*/
703 +
704 +static int gnbd_ctl_ioctl(struct inode *inode, struct file *file,
705 +                    unsigned int cmd, unsigned long arg)
706 +{
707 +       struct gnbd_device *dev = NULL;
708 +       struct block_device *bdev;
709 +        do_it_req_t req;
710 +       int error;
711 +
712 +       if (!capable(CAP_SYS_ADMIN))
713 +               return -EPERM;
714 +
715 +       if (cmd == GNBD_DISCONNECT || cmd == GNBD_CLEAR_QUE ||
716 +                        cmd == GNBD_PING || cmd == GNBD_PRINT_DEBUG) {
717 +                if (arg >= MAX_GNBD)
718 +                        return -EINVAL;
719 +                dev = &gnbd_dev[arg];
720 +                BUG_ON(dev->magic != GNBD_MAGIC);
721 +        }
722 +
723 +       /* Anyone capable of this syscall can do *real bad* things */
724 +       dprintk(DBG_IOCTL, "%s: gnbd_ioctl cmd=%s(0x%x) arg=%lu\n",
725 +                       dev->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
726 +
727 +       switch (cmd) {
728 +       case GNBD_DISCONNECT:
729 +               printk(KERN_INFO "%s: GNBD_DISCONNECT\n", dev->disk->disk_name);
730 +               spin_lock(&dev->open_lock);
731 +               if (dev->open_count > 0){
732 +                       spin_unlock(&dev->open_lock);
733 +                       return -EBUSY;
734 +               }
735 +               dev->receiver_pid = -1;
736 +               spin_unlock(&dev->open_lock);
737 +               /* There is no one using the device, you can disconnect it */
738 +               if (dev->sock == NULL)
739 +                       return -ENOTCONN;
740 +               gnbd_send_req(dev, &shutdown_req);
741 +                return 0;
742 +       case GNBD_CLEAR_QUE:
743 +               if (down_interruptible(&dev->do_it_lock))
744 +                       return -EBUSY;
745 +               dev->receiver_pid = -1;
746 +               gnbd_clear_que(dev);
747 +               bdev = dev->bdev;
748 +               if (bdev) {
749 +                       blk_run_queue(dev->disk->queue);
750 +                       fsync_bdev(bdev);
751 +                       invalidate_bdev(bdev, 0);
752 +               }
753 +               up(&dev->do_it_lock);
754 +               return 0;
755 +       case GNBD_DO_IT:
756 +               if (copy_from_user(&req, (do_it_req_t *)arg, sizeof(req)))
757 +                        return -EFAULT;
758 +                if (req.minor >= 128)
759 +                        return -EINVAL;
760 +                dev = &gnbd_dev[req.minor];
761 +                BUG_ON(dev->magic != GNBD_MAGIC);
762 +               if (dev->file)
763 +                       return -EBUSY;
764 +               error = -EINVAL;
765 +               file = fget(req.sock_fd);
766 +               if (!file)
767 +                       return error;
768 +               inode = file->f_dentry->d_inode;
769 +               if (!inode->i_sock) {
770 +                       fput(file);
771 +                       return error;
772 +               }
773 +               if (down_trylock(&dev->do_it_lock)){
774 +                       fput(file);
775 +                       return -EBUSY;
776 +               }
777 +               error = gnbd_resend_requests(dev, SOCKET_I(inode));
778 +               if (error){
779 +                       printk("quitting NBD_DO_IT\n");
780 +                       up(&dev->do_it_lock);
781 +                       fput(file);
782 +                       return error;
783 +               }
784 +               dev->file = file;
785 +               dev->sock = SOCKET_I(inode);
786 +               dev->receiver_pid = current->pid; 
787 +               blk_run_queue(dev->disk->queue);
788 +               error = gnbd_do_it(dev);
789 +               /* should I kill the socket first */
790 +               up(&dev->do_it_lock);
791 +               down(&dev->tx_lock);
792 +               if (dev->sock) {
793 +                       printk(KERN_WARNING "%s: shutting down socket\n",
794 +                                       dev->disk->disk_name);
795 +                       dev->sock->ops->shutdown(dev->sock,
796 +                                       SEND_SHUTDOWN|RCV_SHUTDOWN);
797 +                       dev->sock = NULL;
798 +               }
799 +               up(&dev->tx_lock);
800 +               file = dev->file;
801 +               dev->file = NULL;
802 +               if (file)
803 +                       fput(file);
804 +               printk("exitting GNBD_DO_IT ioctl\n");
805 +               return error;
806 +       case GNBD_PING:
807 +               /* FIXME -- should I allow pings if everything is compeletely
808 +                * shutdown */
809 +               spin_lock(&dev->queue_lock);
810 +               /* only one outstanding ping at a time */
811 +               if (list_empty(&ping_req.queuelist)){
812 +                       if (list_empty(&dev->queue_head))
813 +                               dev->last_received = jiffies;
814 +                       list_add(&ping_req.queuelist, &dev->queue_head);
815 +               }
816 +               spin_unlock(&dev->queue_lock);
817 +               gnbd_send_req(dev, &ping_req); /* ignore the errors */
818 +               return 0;
819 +       case GNBD_PRINT_DEBUG:
820 +               printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
821 +                       dev->disk->disk_name,
822 +                       dev->queue_head.next, dev->queue_head.prev,
823 +                       &dev->queue_head);
824 +               return 0;
825 +       case GNBD_GET_TIME:
826 +               if (copy_to_user((void *)arg, &insmod_time, sizeof(uint64_t))){
827 +                       printk(KERN_WARNING "couldn't compy time argument to user\n");
828 +                       return -EFAULT;
829 +               }
830 +               return 0;
831 +       }
832 +       /* FIXME -- should I print something, is EINVAL the right error */
833 +       return -EINVAL;
834 +}
835 +
836 +static int gnbd_open(struct inode *inode, struct file *file)
837 +{
838 +       struct gnbd_device *dev = inode->i_bdev->bd_disk->private_data;
839 +       spin_lock(&dev->open_lock);
840 +       if (dev->receiver_pid == -1){
841 +               spin_unlock(&dev->open_lock);
842 +               return -ENXIO;
843 +       }
844 +       spin_unlock(&dev->open_lock);
845 +       if ((file->f_mode & FMODE_WRITE) && (dev->flags & GNBD_READ_ONLY)){
846 +               printk(KERN_INFO "cannot open read only gnbd device read/write");
847 +               return -EROFS;
848 +       }
849 +
850 +       dev->open_count++;
851 +       dev->bdev = inode->i_bdev;
852 +       return 0;
853 +}
854 +
855 +/* FIXME -- I don't sync the device at close. This means that If you write
856 + * something, and close the device, and expect that then it is written,
857 + * you are wrong.... This might cause problems */
858 +static int gnbd_release(struct inode *inode, struct file *file)
859 +{
860 +       struct gnbd_device *dev = inode->i_bdev->bd_disk->private_data;
861 +
862 +       dev->open_count--;
863 +       if (dev->open_count == 0)
864 +               dev->bdev = NULL;
865 +       return 0;
866 +}
867 +
868 +static struct file_operations _gnbd_ctl_fops =
869 +{
870 +        .ioctl = gnbd_ctl_ioctl,
871 +        .owner = THIS_MODULE,
872 +};
873 +
874 +static struct miscdevice _gnbd_misc =
875 +{
876 +        .minor = MISC_DYNAMIC_MINOR,
877 +        .name  = "gnbd_ctl",
878 +        .devfs_name = "gnbd_ctl",
879 +        .fops = &_gnbd_ctl_fops
880 +};
881 +
882 +/* FIXME -- I should probably do more here */
883 +int __init gnbd_ctl_init(void)
884 +{
885 +        int err;
886 +        
887 +        err = misc_register(&_gnbd_misc);
888 +        if (err) {
889 +                printk("cannot register control device\n");
890 +                return err;
891 +        }
892 +        return 0;
893 +}
894 +
895 +void gnbd_ctl_cleanup(void)
896 +{
897 +        if (misc_deregister(&_gnbd_misc) < 0)
898 +                printk("cannot deregister control device\n");
899 +}
900 +
901 +static struct block_device_operations gnbd_fops =
902 +{
903 +       .open =         gnbd_open,
904 +       .release =      gnbd_release,
905 +       .owner =        THIS_MODULE,
906 +};
907 +
908 +/*
909 + * And here should be modules and kernel interface 
910 + *  (Just smiley confuses emacs :-)
911 + */
912 +
913 +static int __init gnbd_init(void)
914 +{
915 +       int err = -ENOMEM;
916 +       struct timeval tv;
917 +       int i;
918 +
919 +       if (sizeof(struct gnbd_request) != 28) {
920 +               printk(KERN_CRIT "gnbd: sizeof gnbd_request needs to be 28 in order to work!\n" );
921 +               return -EIO;
922 +       }
923 +       shutdown_req.flags = REQ_SPECIAL;
924 +       gnbd_cmd(&shutdown_req) = GNBD_CMD_DISC;
925 +       shutdown_req.sector = 0;
926 +       shutdown_req.nr_sectors = 0;
927 +
928 +       ping_req.flags = REQ_SPECIAL;
929 +       gnbd_cmd(&ping_req) = GNBD_CMD_PING;
930 +       ping_req.sector = 0;
931 +       ping_req.nr_sectors = 0;
932 +       
933 +       for (i = 0; i < MAX_GNBD; i++) {
934 +               struct gendisk *disk = alloc_disk(1);
935 +               if (!disk)
936 +                       goto out;
937 +               gnbd_dev[i].disk = disk;
938 +               /*
939 +                * The new linux 2.5 block layer implementation requires
940 +                * every gendisk to have its very own request_queue struct.
941 +                * These structs are big so we dynamically allocate them.
942 +                */
943 +               disk->queue = blk_init_queue(do_gnbd_request, &gnbd_lock);
944 +               if (!disk->queue) {
945 +                       put_disk(disk);
946 +                       goto out;
947 +               }
948 +       }
949 +       major_nr = register_blkdev(major_nr, "gnbd");
950 +       if (major_nr < 0) {
951 +               printk("gnbd: unable to get a major number\n");
952 +               err = major_nr;
953 +               goto out;
954 +       }
955 +
956 +       printk(KERN_INFO "gnbd: registered device at major %d\n", major_nr);
957 +       dprintk(DBG_INIT, "gnbd: debugflags=0x%x\n", debugflags);
958 +
959 +       devfs_mk_dir("gnbd_minor");
960 +       err = class_register(&gnbd_class);
961 +       if (err)
962 +               goto out_unregister;
963 +       for (i = 0; i < MAX_GNBD; i++) {
964 +               struct gendisk *disk = gnbd_dev[i].disk;
965 +               gnbd_dev[i].file = NULL;
966 +               gnbd_dev[i].magic = GNBD_MAGIC;
967 +               gnbd_dev[i].flags = 0;
968 +               gnbd_dev[i].open_count = 0;
969 +               gnbd_dev[i].receiver_pid = -1;
970 +               gnbd_dev[i].server_name = NULL;
971 +               gnbd_dev[i].server_port = 0;
972 +               gnbd_dev[i].name[0] = '\0';
973 +               gnbd_dev[i].bdev = NULL;
974 +               spin_lock_init(&gnbd_dev[i].queue_lock);
975 +               spin_lock_init(&gnbd_dev[i].open_lock);
976 +               INIT_LIST_HEAD(&gnbd_dev[i].queue_head);
977 +               init_MUTEX(&gnbd_dev[i].tx_lock);
978 +               init_MUTEX(&gnbd_dev[i].do_it_lock);
979 +               gnbd_dev[i].class_dev.class = &gnbd_class;
980 +               sprintf(gnbd_dev[i].class_dev.class_id, "gnbd%d", i);
981 +               err = class_device_register(&gnbd_dev[i].class_dev);
982 +               if (err){
983 +                       printk("class_device_register failed with %d\n", err);
984 +                       goto out_unregister_class;
985 +               }
986 +               if(class_device_create_file(&gnbd_dev[i].class_dev,
987 +                                       &class_device_attr_pid))
988 +                       goto out_remove_file;
989 +               if(class_device_create_file(&gnbd_dev[i].class_dev,
990 +                                       &class_device_attr_server))
991 +                       goto out_remove_file;
992 +               if(class_device_create_file(&gnbd_dev[i].class_dev,
993 +                                       &class_device_attr_name))
994 +                       goto out_remove_file;
995 +               if(class_device_create_file(&gnbd_dev[i].class_dev,
996 +                                       &class_device_attr_sectors))
997 +                       goto out_remove_file;
998 +               if(class_device_create_file(&gnbd_dev[i].class_dev,
999 +                                       &class_device_attr_usage))
1000 +                       goto out_remove_file;
1001 +               if(class_device_create_file(&gnbd_dev[i].class_dev,
1002 +                                       &class_device_attr_flags))
1003 +                       goto out_remove_file;
1004 +               if(class_device_create_file(&gnbd_dev[i].class_dev,
1005 +                                       &class_device_attr_waittime))
1006 +                       goto out_remove_file;
1007 +               if(class_device_create_file(&gnbd_dev[i].class_dev,
1008 +                                       &class_device_attr_connected))
1009 +                       goto out_remove_file;
1010 +               disk->major = major_nr;
1011 +               disk->first_minor = i;
1012 +               disk->fops = &gnbd_fops;
1013 +               disk->private_data = &gnbd_dev[i];
1014 +               sprintf(disk->disk_name, "gnbd%d", i);
1015 +               sprintf(disk->devfs_name, "gnbd_minor/%d", i);
1016 +               set_capacity(disk, 0);
1017 +               add_disk(disk);
1018 +               if(sysfs_create_link(&gnbd_dev[i].class_dev.kobj,
1019 +                                       &gnbd_dev[i].disk->kobj, "block"))
1020 +                       goto out_remove_disk;
1021 +               
1022 +       }
1023 +
1024 +        err = gnbd_ctl_init();
1025 +        if (err)
1026 +                goto out_unregister_class;
1027 +        
1028 +       insmod_time = (uint64_t) tv.tv_sec * 1000000 + tv.tv_usec;
1029 +
1030 +       return 0;
1031 +out_remove_disk:
1032 +       del_gendisk(gnbd_dev[i].disk);
1033 +out_remove_file:
1034 +       class_device_unregister(&gnbd_dev[i].class_dev);
1035 +out_unregister_class:
1036 +       while(i--){
1037 +               del_gendisk(gnbd_dev[i].disk);
1038 +               class_device_unregister(&gnbd_dev[i].class_dev);
1039 +       }
1040 +       i = MAX_GNBD;
1041 +       class_unregister(&gnbd_class);
1042 +out_unregister:
1043 +       unregister_blkdev(major_nr, "gnbd");
1044 +out:
1045 +       while (i--) {
1046 +               blk_cleanup_queue(gnbd_dev[i].disk->queue);
1047 +               put_disk(gnbd_dev[i].disk);
1048 +       }
1049 +       return err;
1050 +}
1051 +
1052 +static void __exit gnbd_cleanup(void)
1053 +{
1054 +       int i;
1055 +
1056 +       gnbd_ctl_cleanup();
1057 +       for (i = 0; i < MAX_GNBD; i++) {
1058 +               struct gendisk *disk = gnbd_dev[i].disk;
1059 +               class_device_unregister(&gnbd_dev[i].class_dev);
1060 +               if (disk) {
1061 +                       del_gendisk(disk);
1062 +                       blk_cleanup_queue(disk->queue);
1063 +                       put_disk(disk);
1064 +               }
1065 +               if (gnbd_dev[i].server_name)
1066 +                       kfree(gnbd_dev[i].server_name);
1067 +       }
1068 +       class_unregister(&gnbd_class);
1069 +       devfs_remove("gnbd");
1070 +       unregister_blkdev(major_nr, "gnbd");
1071 +       printk(KERN_INFO "gnbd: unregistered device at major %d\n", major_nr);
1072 +}
1073 +
1074 +module_init(gnbd_init);
1075 +module_exit(gnbd_cleanup);
1076 +
1077 +MODULE_DESCRIPTION("Network Block Device");
1078 +MODULE_LICENSE("GPL");
1079 +
1080 +#ifndef NDEBUG
1081 +MODULE_PARM(debugflags, "i");
1082 +MODULE_PARM_DESC(debugflags, "flags for controlling debug output");
1083 +#endif
1084 diff -urN linux-2.6.9/include/linux/gnbd.h linux-2.6.9-patched/include/linux/gnbd.h
1085 --- linux-2.6.9/include/linux/gnbd.h    1969-12-31 18:00:00.000000000 -0600
1086 +++ linux-2.6.9-patched/include/linux/gnbd.h    2004-10-22 13:39:34.000000000 -0500
1087 @@ -0,0 +1,103 @@
1088 +/******************************************************************************
1089 +*******************************************************************************
1090 +**
1091 +**  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
1092 +**  Copyright (C) 2004 Red Hat, Inc.  All rights reserved.
1093 +**
1094 +**  This copyrighted material is made available to anyone wishing to use,
1095 +**  modify, copy, or redistribute it subject to the terms and conditions
1096 +**  of the GNU General Public License v.2.
1097 +**
1098 +*******************************************************************************
1099 +******************************************************************************/
1100 +
1101 +#ifndef LINUX_GNBD_H
1102 +#define LINUX_GNBD_H
1103 +
1104 +#define GNBD_DO_IT     _IO( 0xab, 0x20 )
1105 +#define GNBD_CLEAR_QUE _IO( 0xab, 0x21 )
1106 +#define GNBD_PRINT_DEBUG       _IO( 0xab, 0x22 )
1107 +#define GNBD_DISCONNECT  _IO( 0xab, 0x23 )
1108 +#define GNBD_PING      _IO( 0xab, 0x24 )
1109 +#define GNBD_GET_TIME _IO( 0xab, 0x25 )
1110 +
1111 +enum {
1112 +       GNBD_CMD_READ = 0,
1113 +       GNBD_CMD_WRITE = 1,
1114 +       GNBD_CMD_DISC = 2,
1115 +       GNBD_CMD_PING = 3
1116 +};
1117 +
1118 +#define gnbd_cmd(req) ((req)->cmd[0])
1119 +#define MAX_GNBD 128
1120 +
1121 +/* values for flags field */
1122 +#define GNBD_READ_ONLY 0x0001
1123 +
1124 +/* userspace doesn't need the gnbd_device structure */
1125 +#ifdef __KERNEL__
1126 +
1127 +struct gnbd_device {
1128 +       unsigned short int flags;
1129 +       struct socket * sock;
1130 +       struct file * file;     /* If == NULL, device is not ready, yet */
1131 +       int magic;
1132 +       spinlock_t queue_lock;
1133 +       spinlock_t open_lock;
1134 +       struct list_head queue_head;/* Requests are added here...       */
1135 +       struct semaphore tx_lock;
1136 +       struct gendisk *disk;
1137 +       pid_t receiver_pid;
1138 +       struct semaphore do_it_lock;
1139 +       int open_count;
1140 +       struct class_device class_dev;
1141 +       unsigned short int server_port;
1142 +       char *server_name;
1143 +       char name[32];
1144 +       unsigned long last_received;
1145 +       struct block_device *bdev;
1146 +};
1147 +
1148 +#endif /* __KERNEL__ */
1149 +
1150 +/* These are sent over the network in the request/reply magic fields */
1151 +
1152 +#define GNBD_REQUEST_MAGIC 0x37a07e00
1153 +#define GNBD_REPLY_MAGIC 0x41f09370
1154 +#define GNBD_KEEP_ALIVE_MAGIC 0x5B46D8C2
1155 +/* Do *not* use magics: 0x12560953 0x96744668. */
1156 +
1157 +/*
1158 + * This is the packet used for communication between client and
1159 + * server. All data are in network byte order.
1160 + */
1161 +struct gnbd_request {
1162 +       uint32_t magic;
1163 +       uint32_t type;  /* == READ || == WRITE  why so long */
1164 +       char handle[8];  /* why is this a char array instead of a u64 */
1165 +       uint64_t from;
1166 +       uint32_t len;
1167 +}
1168 +#ifdef __GNUC__
1169 +       __attribute__ ((packed))
1170 +#endif /* __GNUC__ */
1171 +;
1172 +
1173 +/*
1174 + * This is the reply packet that gnbd-server sends back to the client after
1175 + * it has completed an I/O request (or an error occurs).
1176 + */
1177 +#define SIZE_OF_REPLY 16
1178 +struct gnbd_reply {
1179 +       uint32_t magic;
1180 +       uint32_t error;         /* 0 = ok, else error   */
1181 +       char handle[8];         /* handle you got from request  */
1182 +};
1183 +
1184 +struct do_it_req_s {
1185 +        unsigned int minor;
1186 +        int sock_fd;
1187 +};
1188 +typedef struct do_it_req_s do_it_req_t;
1189 +
1190 +#endif /* LINUX_GNBD_H */
This page took 0.117227 seconds and 3 git commands to generate.