]> git.pld-linux.org Git - packages/kernel.git/blame - linux-cluster-gnbd.patch
- raw http://vserver.13thfloor.at/Experimental/patch-2.6.10-vs1.9.3.17.diff
[packages/kernel.git] / linux-cluster-gnbd.patch
CommitLineData
c783755a
AM
1diff -urN linux-2.6.9/drivers/block/Kconfig linux-2.6.9-patched/drivers/block/Kconfig
2--- linux-2.6.9/drivers/block/Kconfig 2004-10-18 16:53:43.000000000 -0500
3+++ linux-2.6.9-patched/drivers/block/Kconfig 2004-10-22 13:27:52.938836304 -0500
4@@ -356,6 +356,13 @@
c1c6733f
AM
5 your machine, or if you want to have a raid or loopback device
6 bigger than 2TB. Otherwise say N.
7
8+config BLK_DEV_GNBD
9+ tristate "Global network block device support"
10+ depends on NET
11+ ---help---
12+
13+ If unsure, say N.
14+
15 source "drivers/s390/block/Kconfig"
16
17 endmenu
c783755a
AM
18diff -urN linux-2.6.9/drivers/block/Makefile linux-2.6.9-patched/drivers/block/Makefile
19--- linux-2.6.9/drivers/block/Makefile 2004-10-18 16:54:55.000000000 -0500
20+++ linux-2.6.9-patched/drivers/block/Makefile 2004-10-22 13:30:48.224188880 -0500
21@@ -43,4 +43,4 @@
c1c6733f
AM
22 obj-$(CONFIG_VIODASD) += viodasd.o
23 obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
b7b72b66 24 obj-$(CONFIG_BLK_DEV_UB) += ub.o
c1c6733f
AM
25-
26+obj-$(CONFIG_BLK_DEV_GNBD) += gnbd.o
c783755a
AM
27diff -urN linux-2.6.9/drivers/block/gnbd.c linux-2.6.9-patched/drivers/block/gnbd.c
28--- linux-2.6.9/drivers/block/gnbd.c 1969-12-31 18:00:00.000000000 -0600
29+++ linux-2.6.9-patched/drivers/block/gnbd.c 2004-10-22 13:43:33.303879088 -0500
30@@ -0,0 +1,1053 @@
c1c6733f
AM
31+/******************************************************************************
32+*******************************************************************************
33+**
34+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
35+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
36+**
37+** This copyrighted material is made available to anyone wishing to use,
38+** modify, copy, or redistribute it subject to the terms and conditions
39+** of the GNU General Public License v.2.
40+**
41+*******************************************************************************
42+******************************************************************************/
43+
44+/* Large chunks of this code were lifted from nbd.c */
45+
46+#include <linux/major.h>
47+
48+#include <linux/blkdev.h>
49+#include <linux/module.h>
50+#include <linux/init.h>
51+#include <linux/sched.h>
52+#include <linux/fs.h>
53+#include <linux/bio.h>
54+#include <linux/stat.h>
55+#include <linux/errno.h>
56+#include <linux/file.h>
57+#include <linux/ioctl.h>
58+#include <net/sock.h>
59+#include <linux/in.h>
60+#include <linux/buffer_head.h>
61+#include <linux/miscdevice.h>
62+
63+#include <linux/devfs_fs_kernel.h>
64+
65+#include <asm/uaccess.h>
66+#include <asm/types.h>
67+
68+#include <linux/gnbd.h>
69+
70+static int major_nr = 0;
71+uint64_t insmod_time;
72+
73+
74+#define GNBD_MAGIC 0x74d06100
75+
76+#ifdef NDEBUG
77+#define dprintk(flags, fmt...)
78+#else /* NDEBUG */
79+#define dprintk(flags, fmt...) do { \
80+ if (debugflags & (flags)) printk(KERN_DEBUG fmt); \
81+} while (0)
82+#define DBG_IOCTL 0x0004
83+#define DBG_INIT 0x0010
84+#define DBG_EXIT 0x0020
85+#define DBG_BLKDEV 0x0100
86+#define DBG_RX 0x0200
87+#define DBG_TX 0x0400
88+static unsigned int debugflags;
89+#endif /* NDEBUG */
90+
91+static struct gnbd_device gnbd_dev[MAX_GNBD];
92+
93+struct request shutdown_req;
94+struct request ping_req;
95+
96+static spinlock_t gnbd_lock = SPIN_LOCK_UNLOCKED;
97+
98+#define to_gnbd_dev(d) container_of(d, struct gnbd_device, class_dev)
99+
100+static void gnbd_class_release(struct class_device *class_dev)
101+{
102+ printk("releasing gnbd class\n");
103+ /* FIXME -- What the hell do I have to free up here */
104+}
105+
106+static struct class gnbd_class = {
107+ .name = "gnbd",
108+ .release = gnbd_class_release
109+};
110+
111+
112+static ssize_t show_pid(struct class_device *class_dev, char *buf)
113+{
114+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
115+ return sprintf(buf, "%d\n", dev->receiver_pid);
116+}
117+
118+static CLASS_DEVICE_ATTR(pid, S_IRUGO, show_pid, NULL);
119+
120+static ssize_t show_server(struct class_device *class_dev, char *buf)
121+{
122+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
6461ecac
AM
123+ if (dev->server_name)
124+ return sprintf(buf, "%s/%hx\n", dev->server_name,
125+ dev->server_port);
126+ else
127+ return sprintf(buf, "\n");
c1c6733f
AM
128+}
129+
6461ecac 130+/* FIXME -- should a empty store free the memory */
c1c6733f
AM
131+static ssize_t store_server(struct class_device *class_dev,
132+ const char *buf, size_t count)
133+{
134+ int res;
c1c6733f 135+ short unsigned int port;
6461ecac 136+ char *ptr;
c1c6733f
AM
137+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
138+ if (down_trylock(&dev->do_it_lock))
139+ return -EBUSY;
6461ecac
AM
140+ if (dev->server_name)
141+ kfree(dev->server_name);
142+ dev->server_name = kmalloc(count + 1, GFP_KERNEL);
143+ if (!dev->server_name)
144+ return -ENOMEM;
145+ memcpy(dev->server_name, buf, count);
146+ dev->server_name[count] = 0;
147+ ptr = strchr(dev->server_name, '/');
148+ if (!ptr)
149+ return -EINVAL;
150+ *ptr++ = 0;
151+ res = sscanf(ptr, "%4hx", &port);
152+ if (res != 1){
c1c6733f
AM
153+ up(&dev->do_it_lock);
154+ return -EINVAL;
155+ }
c1c6733f
AM
156+ dev->server_port = port;
157+ up(&dev->do_it_lock);
158+ return count;
159+}
160+
161+CLASS_DEVICE_ATTR(server, S_IRUGO | S_IWUSR, show_server, store_server);
162+
163+static ssize_t show_name(struct class_device *class_dev, char *buf)
164+{
165+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
166+ return sprintf(buf, "%s\n", dev->name);
167+}
168+
169+static ssize_t store_name(struct class_device *class_dev,
170+ const char *buf, size_t count)
171+{
172+ int res;
173+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
174+ if (down_trylock(&dev->do_it_lock))
175+ return -EBUSY;
176+ res = sscanf(buf, "%31s", dev->name);
177+ up(&dev->do_it_lock);
178+ if (res != 1)
179+ return -EINVAL;
180+ return count;
181+}
182+
183+CLASS_DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_name, store_name);
184+
185+
186+static ssize_t show_sectors(struct class_device *class_dev, char *buf)
187+{
188+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
189+ return sprintf(buf, "%Lu\n",
190+ (unsigned long long)get_capacity(dev->disk));
191+}
192+
193+static ssize_t store_sectors(struct class_device *class_dev,
194+ const char *buf, size_t count)
195+{
196+ int res;
197+ sector_t size;
198+ struct block_device *bdev;
199+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
200+
201+ if (down_trylock(&dev->do_it_lock))
202+ return -EBUSY;
203+ res = sscanf(buf, "%Lu\n", &size);
204+ if (res != 1){
205+ up(&dev->do_it_lock);
206+ return -EINVAL;
207+ }
208+ /* FIXME -- should I switch the order here, so that I don't have
209+ capacity set to one thing and the bdev inode size set to another */
210+ set_capacity(dev->disk, size);
211+ bdev = bdget_disk(dev->disk, 0);
212+ if (bdev) {
213+ down(&bdev->bd_inode->i_sem);
214+ i_size_write(bdev->bd_inode, (loff_t)size << 9);
215+ up(&bdev->bd_inode->i_sem);
216+ bdput(bdev);
217+ }
218+ up(&dev->do_it_lock);
219+ return count;
220+}
221+
222+CLASS_DEVICE_ATTR(sectors, S_IRUGO | S_IWUSR, show_sectors, store_sectors);
223+
224+static ssize_t show_usage(struct class_device *class_dev, char *buf)
225+{
226+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
227+ return sprintf(buf, "%d\n", dev->open_count);
228+}
229+
230+CLASS_DEVICE_ATTR(usage, S_IRUGO, show_usage, NULL);
231+
232+static ssize_t show_flags(struct class_device *class_dev, char *buf)
233+{
234+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
235+ return sprintf(buf, "0x%04x\n", dev->flags);
236+}
237+
238+static ssize_t store_flags(struct class_device *class_dev,
239+ const char *buf, size_t count)
240+{
241+ int res;
242+
243+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
244+ if (down_trylock(&dev->do_it_lock))
245+ return -EBUSY;
246+ res = sscanf(buf, "0x%hx", &dev->flags);
247+ up(&dev->do_it_lock);
248+ if (res != 1)
249+ return -EINVAL;
250+ return count;
251+}
252+
253+
254+CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags);
255+
256+static ssize_t show_waittime(struct class_device *class_dev, char *buf)
257+{
258+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
259+ if (list_empty(&dev->queue_head))
260+ return sprintf(buf, "-1\n");
261+ return sprintf(buf, "%ld\n",
262+ ((long)jiffies - (long)dev->last_received) / HZ);
263+}
264+
265+CLASS_DEVICE_ATTR(waittime, S_IRUGO, show_waittime, NULL);
266+
267+static ssize_t show_connected(struct class_device *class_dev, char *buf)
268+{
269+ struct gnbd_device *dev = to_gnbd_dev(class_dev);
270+ return sprintf(buf, "%d\n", (dev->sock != NULL));
271+}
272+
273+CLASS_DEVICE_ATTR(connected, S_IRUGO, show_connected, NULL);
274+
275+#ifndef NDEBUG
276+static const char *ioctl_cmd_to_ascii(int cmd)
277+{
278+ switch (cmd) {
279+ case GNBD_DO_IT: return "do-it";
280+ case GNBD_CLEAR_QUE: return "clear-que";
281+ case GNBD_PRINT_DEBUG: return "print-debug";
282+ case GNBD_DISCONNECT: return "disconnect";
283+ }
284+ return "unknown";
285+}
286+
287+static const char *gnbdcmd_to_ascii(int cmd)
288+{
289+ switch (cmd) {
290+ case GNBD_CMD_READ: return "read";
291+ case GNBD_CMD_WRITE: return "write";
292+ case GNBD_CMD_DISC: return "disconnect";
293+ case GNBD_CMD_PING: return "ping";
294+ }
295+ return "invalid";
296+}
297+#endif /* NDEBUG */
298+
299+static void gnbd_end_request(struct request *req)
300+{
301+ int uptodate = (req->errors == 0) ? 1 : 0;
302+ request_queue_t *q = req->q;
c1c6733f
AM
303+ unsigned long flags;
304+
305+ dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name,
306+ req, uptodate? "done": "failed");
307+
308+ if (!uptodate)
309+ printk("%s %d called gnbd_end_request with and error\n",
310+ current->comm, current->pid);
311+
c1c6733f
AM
312+ spin_lock_irqsave(q->queue_lock, flags);
313+ if (!end_that_request_first(req, uptodate, req->nr_sectors)) {
314+ end_that_request_last(req);
315+ }
316+ spin_unlock_irqrestore(q->queue_lock, flags);
317+}
318+
319+/*
320+ * Send or receive packet.
321+ */
322+static int sock_xmit(struct socket *sock, int send, void *buf, int size,
323+ int msg_flags)
324+{
325+ mm_segment_t oldfs;
326+ int result;
327+ struct msghdr msg;
328+ struct iovec iov;
329+ unsigned long flags;
330+ sigset_t oldset;
331+
332+ oldfs = get_fs();
333+ set_fs(get_ds());
334+ /* Allow interception of SIGKILL only
335+ * Don't allow other signals to interrupt the transmission */
336+ spin_lock_irqsave(&current->sighand->siglock, flags);
337+ oldset = current->blocked;
338+ sigfillset(&current->blocked);
b7b72b66
AM
339+ sigdelsetmask(&current->blocked, sigmask(SIGKILL) | sigmask(SIGTERM) |
340+ sigmask(SIGHUP));
c1c6733f
AM
341+ recalc_sigpending();
342+ spin_unlock_irqrestore(&current->sighand->siglock, flags);
343+
344+ do {
345+ sock->sk->sk_allocation = GFP_NOIO;
346+ iov.iov_base = buf;
347+ iov.iov_len = size;
348+ msg.msg_name = NULL;
349+ msg.msg_namelen = 0;
350+ msg.msg_iov = &iov;
351+ msg.msg_iovlen = 1;
352+ msg.msg_control = NULL;
353+ msg.msg_controllen = 0;
354+ msg.msg_namelen = 0;
355+ msg.msg_flags = msg_flags | MSG_NOSIGNAL;
356+
357+ if (send)
358+ result = sock_sendmsg(sock, &msg, size);
359+ else
360+ result = sock_recvmsg(sock, &msg, size, 0);
361+
362+ if (signal_pending(current)) {
363+ siginfo_t info;
364+ spin_lock_irqsave(&current->sighand->siglock, flags);
365+ printk(KERN_WARNING "gnbd (pid %d: %s) got signal %d\n",
366+ current->pid, current->comm,
367+ dequeue_signal(current, &current->blocked, &info));
368+ spin_unlock_irqrestore(&current->sighand->siglock, flags);
369+ result = -EINTR;
370+ break;
371+ }
372+
373+ if (result <= 0) {
374+ if (result == 0)
375+ result = -EPIPE; /* short read */
376+ break;
377+ }
378+ size -= result;
379+ buf += result;
380+ } while (size > 0);
381+
382+ spin_lock_irqsave(&current->sighand->siglock, flags);
383+ current->blocked = oldset;
384+ recalc_sigpending();
385+ spin_unlock_irqrestore(&current->sighand->siglock, flags);
386+
387+ set_fs(oldfs);
388+ return result;
389+}
390+
391+static inline int sock_send_bvec(struct socket *sock, struct bio_vec *bvec,
392+ int flags)
393+{
394+ int result;
395+ void *kaddr = kmap(bvec->bv_page);
396+ result = sock_xmit(sock, 1, kaddr + bvec->bv_offset, bvec->bv_len,
397+ flags);
398+ kunmap(bvec->bv_page);
399+ return result;
400+}
401+
402+
403+#define gnbd_send_req(dev, req) __gnbd_send_req((dev), (dev)->sock, (req))
404+
405+int __gnbd_send_req(struct gnbd_device *dev, struct socket *sock,
406+ struct request *req)
407+{
408+ int result, i, flags;
409+ struct gnbd_request request;
410+ unsigned long size = req->nr_sectors << 9;
411+
412+ request.magic = htonl(GNBD_REQUEST_MAGIC);
413+ request.type = htonl(gnbd_cmd(req));
414+ request.from = cpu_to_be64((u64) req->sector << 9);
415+ request.len = htonl(size);
416+ memcpy(request.handle, &req, sizeof(req));
417+
418+ down(&dev->tx_lock);
419+
420+ if (!sock) {
421+ printk(KERN_ERR "%s: Attempted send on closed socket\n",
422+ dev->disk->disk_name);
423+ result = -ENOTCONN;
424+ goto error_out;
425+ }
426+
427+ dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
428+ dev->disk->disk_name, req,
429+ gnbdcmd_to_ascii(gnbd_cmd(req)),
430+ (unsigned long long)req->sector << 9,
431+ req->nr_sectors << 9);
432+ result = sock_xmit(sock, 1, &request, sizeof(request),
433+ (gnbd_cmd(req) == GNBD_CMD_WRITE)? MSG_MORE: 0);
434+ if (result < 0) {
435+ printk(KERN_ERR "%s: Send control failed (result %d)\n",
436+ dev->disk->disk_name, result);
437+ goto error_out;
438+ }
439+
440+ if (gnbd_cmd(req) == GNBD_CMD_WRITE) {
441+ struct bio *bio;
442+ /*
443+ * we are really probing at internals to determine
444+ * whether to set MSG_MORE or not...
445+ */
446+ rq_for_each_bio(bio, req) {
447+ struct bio_vec *bvec;
448+ bio_for_each_segment(bvec, bio, i) {
449+ flags = 0;
450+ if ((i < (bio->bi_vcnt - 1)) || bio->bi_next)
451+ flags = MSG_MORE;
452+ dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
453+ dev->disk->disk_name, req,
454+ bvec->bv_len);
455+ result = sock_send_bvec(sock, bvec, flags);
456+ if (result < 0) {
457+ printk(KERN_ERR "%s: Send data failed (result %d)\n",
458+ dev->disk->disk_name,
459+ result);
460+ goto error_out;
461+ }
462+ }
463+ }
464+ }
465+ up(&dev->tx_lock);
466+ return 0;
467+
468+error_out:
469+ up(&dev->tx_lock);
470+ return result;
471+}
472+
473+
474+static int gnbd_find_request(struct gnbd_device *dev, struct request *xreq)
475+{
476+ struct request *req;
477+ struct list_head *tmp;
478+
479+ list_for_each(tmp, &dev->queue_head) {
480+ req = list_entry(tmp, struct request, queuelist);
481+ if (req != xreq)
482+ continue;
483+ return 1;
484+ }
485+ return 0;
486+}
487+
488+static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec)
489+{
490+ int result;
491+ void *kaddr = kmap(bvec->bv_page);
492+ result = sock_xmit(sock, 0, kaddr + bvec->bv_offset, bvec->bv_len,
493+ MSG_WAITALL);
494+ kunmap(bvec->bv_page);
495+ return result;
496+}
497+
498+int gnbd_recv_req(struct gnbd_device *dev, struct request *req)
499+{
500+ int result;
501+ int i;
502+ struct bio *bio;
503+ rq_for_each_bio(bio, req) {
504+ struct bio_vec *bvec;
505+ bio_for_each_segment(bvec, bio, i) {
506+ result = sock_recv_bvec(dev->sock, bvec);
507+ if (result < 0) {
508+ printk(KERN_ERR "%s: Receive data failed (result %d)\n",
509+ dev->disk->disk_name,
510+ result);
511+ return result;
512+ }
513+ dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
514+ dev->disk->disk_name, req, bvec->bv_len);
515+ }
516+ }
517+ return 0;
518+}
519+
520+int gnbd_do_it(struct gnbd_device *dev)
521+{
522+ int result;
523+ struct gnbd_reply reply;
524+ struct request *req;
525+ struct socket *sock = dev->sock;
526+
527+ BUG_ON(dev->magic != GNBD_MAGIC);
528+
529+ while((result = sock_xmit(sock, 0, &reply,sizeof(reply), MSG_WAITALL)) > 0){
530+ if (ntohl(reply.magic) == GNBD_KEEP_ALIVE_MAGIC)
531+ /* FIXME -- I should reset the wait time here */
532+ continue;
533+
534+ memcpy(&req, reply.handle, sizeof(req));
535+ if (req == &shutdown_req)
536+ return 0;
537+
538+ if (!gnbd_find_request(dev, req)){
539+ printk(KERN_ERR "%s: Unexpected reply (%p)\n",
540+ dev->disk->disk_name, reply.handle);
541+ return -EBADR;
542+ }
543+ if (ntohl(reply.magic) != GNBD_REPLY_MAGIC) {
544+ printk(KERN_ERR "%s: Wrong magic (0x%lx)\n",
545+ dev->disk->disk_name,
546+ (unsigned long)ntohl(reply.magic));
547+ return -EPROTO;
548+ }
549+ if (ntohl(reply.error)) {
550+ printk(KERN_ERR "%s: Other side returned error (%d)\n",
551+ dev->disk->disk_name, ntohl(reply.error));
552+ req->errors++;
553+ goto remove_req;
554+ }
555+ dprintk(DBG_RX, "%s: request %p: got reply\n",
556+ dev->disk->disk_name, req);
557+
558+ if (gnbd_cmd(req) == GNBD_CMD_READ){
559+ result = gnbd_recv_req(dev, req);
560+ if (result < 0)
561+ return result;
562+ }
563+remove_req:
564+ spin_lock(&dev->queue_lock);
565+ list_del_init(&req->queuelist);
566+ dev->last_received = jiffies;
567+ spin_unlock(&dev->queue_lock);
568+ if (req != &ping_req)
569+ gnbd_end_request(req);
570+ }
571+ printk(KERN_ERR "%s: Receive control failed (result %d)\n",
572+ dev->disk->disk_name, result);
573+ return result;
574+}
575+
576+void gnbd_clear_que(struct gnbd_device *dev)
577+{
578+ struct request *req;
579+
580+ BUG_ON(dev->magic != GNBD_MAGIC);
581+
582+ do {
583+ req = NULL;
584+ if (!list_empty(&dev->queue_head)) {
585+ req = list_entry(dev->queue_head.next, struct request, queuelist);
586+ list_del_init(&req->queuelist);
587+ }
588+ if (req && req != &ping_req) {
589+ req->errors++;
590+ gnbd_end_request(req);
591+ }
592+ } while (req);
593+}
594+
595+/*
596+ * We always wait for result of write, for now. It would be nice to make it optional
597+ * in future
598+ * if ((req->cmd == WRITE) && (dev->flags & GNBD_WRITE_NOCHK))
599+ * { printk( "Warning: Ignoring result!\n"); gnbd_end_request( req ); }
600+ */
601+
602+static void do_gnbd_request(request_queue_t * q)
603+{
604+ int err;
605+ struct request *req;
606+
607+ while ((req = elv_next_request(q)) != NULL) {
608+ struct gnbd_device *dev;
609+
610+ blkdev_dequeue_request(req);
611+ dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
612+ req->rq_disk->disk_name, req, req->flags);
613+
614+ if (!(req->flags & REQ_CMD))
615+ goto error_out;
616+
617+ dev = req->rq_disk->private_data;
618+
619+ if (dev->receiver_pid == -1)
620+ goto error_out;
621+
622+ BUG_ON(dev->magic != GNBD_MAGIC);
623+
624+ gnbd_cmd(req) = GNBD_CMD_READ;
625+ if (rq_data_dir(req) == WRITE) {
626+ gnbd_cmd(req) = GNBD_CMD_WRITE;
627+ if (dev->flags & GNBD_READ_ONLY) {
628+ printk(KERN_ERR "%s: Write on read-only\n",
629+ dev->disk->disk_name);
630+ goto error_out;
631+ }
632+ }
633+
634+ req->errors = 0;
635+ spin_unlock_irq(q->queue_lock);
636+
637+ spin_lock(&dev->queue_lock);
638+
639+ if (list_empty(&dev->queue_head))
640+ dev->last_received = jiffies;
641+ list_add(&req->queuelist, &dev->queue_head);
c1c6733f
AM
642+ spin_unlock(&dev->queue_lock);
643+
644+ err = gnbd_send_req(dev, req);
645+
c1c6733f
AM
646+ spin_lock_irq(q->queue_lock);
647+ if (err)
648+ goto sock_error;
649+ continue;
650+
651+error_out:
652+ req->errors++;
653+ spin_unlock(q->queue_lock);
654+ gnbd_end_request(req);
655+ spin_lock(q->queue_lock);
656+ }
657+ return;
658+
659+sock_error:
660+ return;
661+}
662+
663+/*
664+ * This is called before dev-sock is set, so you dodn't need
665+ * to worry about the tx_lock or the queue_lock
666+ */
667+static int gnbd_resend_requests(struct gnbd_device *dev, struct socket *sock)
668+{
669+ int err = 0;
670+ struct request *req;
671+ struct list_head *tmp;
672+
673+ printk("resending requests\n");
674+ list_for_each(tmp, &dev->queue_head) {
675+ req = list_entry(tmp, struct request, queuelist);
676+ err = __gnbd_send_req(dev, sock, req);
677+
678+ if (err){
679+ printk("failed trying to resend request (%d)\n", err);
680+ break;
681+ }
682+ }
683+
684+ return err;
685+}
686+/*
687+static int get_server_info(struct gnbd_device *dev, struct socket *sock)
688+{
689+ struct sockaddr_in server;
690+ int len;
691+ int err;
692+
693+ err = sock->ops->getname(sock, (struct sockaddr *) &server, &len, 1);
694+ if (err) {
695+ printk(KERN_WARNING "cannot get socket info, shutting down\n");
696+ } else{
697+ dev->server_addr = server.sin_addr;
698+ dev->server_port = server.sin_port;
699+ }
700+ return err;
701+}
702+*/
703+
704+static int gnbd_ctl_ioctl(struct inode *inode, struct file *file,
705+ unsigned int cmd, unsigned long arg)
706+{
707+ struct gnbd_device *dev = NULL;
708+ struct block_device *bdev;
709+ do_it_req_t req;
710+ int error;
711+
712+ if (!capable(CAP_SYS_ADMIN))
713+ return -EPERM;
714+
715+ if (cmd == GNBD_DISCONNECT || cmd == GNBD_CLEAR_QUE ||
716+ cmd == GNBD_PING || cmd == GNBD_PRINT_DEBUG) {
717+ if (arg >= MAX_GNBD)
718+ return -EINVAL;
719+ dev = &gnbd_dev[arg];
720+ BUG_ON(dev->magic != GNBD_MAGIC);
721+ }
722+
723+ /* Anyone capable of this syscall can do *real bad* things */
724+ dprintk(DBG_IOCTL, "%s: gnbd_ioctl cmd=%s(0x%x) arg=%lu\n",
725+ dev->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
726+
727+ switch (cmd) {
728+ case GNBD_DISCONNECT:
729+ printk(KERN_INFO "%s: GNBD_DISCONNECT\n", dev->disk->disk_name);
730+ spin_lock(&dev->open_lock);
731+ if (dev->open_count > 0){
732+ spin_unlock(&dev->open_lock);
733+ return -EBUSY;
734+ }
735+ dev->receiver_pid = -1;
736+ spin_unlock(&dev->open_lock);
737+ /* There is no one using the device, you can disconnect it */
738+ if (dev->sock == NULL)
739+ return -ENOTCONN;
740+ gnbd_send_req(dev, &shutdown_req);
741+ return 0;
742+ case GNBD_CLEAR_QUE:
743+ if (down_interruptible(&dev->do_it_lock))
744+ return -EBUSY;
745+ dev->receiver_pid = -1;
746+ gnbd_clear_que(dev);
747+ bdev = dev->bdev;
748+ if (bdev) {
749+ blk_run_queue(dev->disk->queue);
750+ fsync_bdev(bdev);
751+ invalidate_bdev(bdev, 0);
752+ }
753+ up(&dev->do_it_lock);
754+ return 0;
755+ case GNBD_DO_IT:
756+ if (copy_from_user(&req, (do_it_req_t *)arg, sizeof(req)))
757+ return -EFAULT;
758+ if (req.minor >= 128)
759+ return -EINVAL;
760+ dev = &gnbd_dev[req.minor];
761+ BUG_ON(dev->magic != GNBD_MAGIC);
762+ if (dev->file)
763+ return -EBUSY;
764+ error = -EINVAL;
765+ file = fget(req.sock_fd);
766+ if (!file)
767+ return error;
768+ inode = file->f_dentry->d_inode;
769+ if (!inode->i_sock) {
770+ fput(file);
771+ return error;
772+ }
773+ if (down_trylock(&dev->do_it_lock)){
774+ fput(file);
775+ return -EBUSY;
776+ }
c1c6733f
AM
777+ error = gnbd_resend_requests(dev, SOCKET_I(inode));
778+ if (error){
779+ printk("quitting NBD_DO_IT\n");
780+ up(&dev->do_it_lock);
781+ fput(file);
782+ return error;
783+ }
784+ dev->file = file;
785+ dev->sock = SOCKET_I(inode);
786+ dev->receiver_pid = current->pid;
787+ blk_run_queue(dev->disk->queue);
788+ error = gnbd_do_it(dev);
789+ /* should I kill the socket first */
790+ up(&dev->do_it_lock);
791+ down(&dev->tx_lock);
792+ if (dev->sock) {
793+ printk(KERN_WARNING "%s: shutting down socket\n",
794+ dev->disk->disk_name);
795+ dev->sock->ops->shutdown(dev->sock,
796+ SEND_SHUTDOWN|RCV_SHUTDOWN);
797+ dev->sock = NULL;
798+ }
799+ up(&dev->tx_lock);
800+ file = dev->file;
801+ dev->file = NULL;
802+ if (file)
803+ fput(file);
804+ printk("exitting GNBD_DO_IT ioctl\n");
805+ return error;
806+ case GNBD_PING:
807+ /* FIXME -- should I allow pings if everything is compeletely
808+ * shutdown */
809+ spin_lock(&dev->queue_lock);
810+ /* only one outstanding ping at a time */
811+ if (list_empty(&ping_req.queuelist)){
812+ if (list_empty(&dev->queue_head))
813+ dev->last_received = jiffies;
814+ list_add(&ping_req.queuelist, &dev->queue_head);
815+ }
816+ spin_unlock(&dev->queue_lock);
817+ gnbd_send_req(dev, &ping_req); /* ignore the errors */
818+ return 0;
819+ case GNBD_PRINT_DEBUG:
820+ printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
821+ dev->disk->disk_name,
822+ dev->queue_head.next, dev->queue_head.prev,
823+ &dev->queue_head);
824+ return 0;
825+ case GNBD_GET_TIME:
826+ if (copy_to_user((void *)arg, &insmod_time, sizeof(uint64_t))){
827+ printk(KERN_WARNING "couldn't compy time argument to user\n");
828+ return -EFAULT;
829+ }
830+ return 0;
831+ }
832+ /* FIXME -- should I print something, is EINVAL the right error */
833+ return -EINVAL;
834+}
835+
836+static int gnbd_open(struct inode *inode, struct file *file)
837+{
838+ struct gnbd_device *dev = inode->i_bdev->bd_disk->private_data;
839+ spin_lock(&dev->open_lock);
840+ if (dev->receiver_pid == -1){
841+ spin_unlock(&dev->open_lock);
842+ return -ENXIO;
843+ }
844+ spin_unlock(&dev->open_lock);
845+ if ((file->f_mode & FMODE_WRITE) && (dev->flags & GNBD_READ_ONLY)){
846+ printk(KERN_INFO "cannot open read only gnbd device read/write");
847+ return -EROFS;
848+ }
849+
850+ dev->open_count++;
851+ dev->bdev = inode->i_bdev;
852+ return 0;
853+}
854+
855+/* FIXME -- I don't sync the device at close. This means that If you write
856+ * something, and close the device, and expect that then it is written,
857+ * you are wrong.... This might cause problems */
858+static int gnbd_release(struct inode *inode, struct file *file)
859+{
860+ struct gnbd_device *dev = inode->i_bdev->bd_disk->private_data;
861+
862+ dev->open_count--;
863+ if (dev->open_count == 0)
864+ dev->bdev = NULL;
865+ return 0;
866+}
867+
868+static struct file_operations _gnbd_ctl_fops =
869+{
870+ .ioctl = gnbd_ctl_ioctl,
871+ .owner = THIS_MODULE,
872+};
873+
874+static struct miscdevice _gnbd_misc =
875+{
876+ .minor = MISC_DYNAMIC_MINOR,
877+ .name = "gnbd_ctl",
878+ .devfs_name = "gnbd_ctl",
879+ .fops = &_gnbd_ctl_fops
880+};
881+
882+/* FIXME -- I should probably do more here */
883+int __init gnbd_ctl_init(void)
884+{
885+ int err;
886+
887+ err = misc_register(&_gnbd_misc);
888+ if (err) {
889+ printk("cannot register control device\n");
890+ return err;
891+ }
892+ return 0;
893+}
894+
895+void gnbd_ctl_cleanup(void)
896+{
897+ if (misc_deregister(&_gnbd_misc) < 0)
898+ printk("cannot deregister control device\n");
899+}
900+
901+static struct block_device_operations gnbd_fops =
902+{
903+ .open = gnbd_open,
904+ .release = gnbd_release,
905+ .owner = THIS_MODULE,
906+};
907+
908+/*
909+ * And here should be modules and kernel interface
910+ * (Just smiley confuses emacs :-)
911+ */
912+
913+static int __init gnbd_init(void)
914+{
915+ int err = -ENOMEM;
916+ struct timeval tv;
917+ int i;
918+
919+ if (sizeof(struct gnbd_request) != 28) {
920+ printk(KERN_CRIT "gnbd: sizeof gnbd_request needs to be 28 in order to work!\n" );
921+ return -EIO;
922+ }
923+ shutdown_req.flags = REQ_SPECIAL;
924+ gnbd_cmd(&shutdown_req) = GNBD_CMD_DISC;
925+ shutdown_req.sector = 0;
926+ shutdown_req.nr_sectors = 0;
927+
928+ ping_req.flags = REQ_SPECIAL;
929+ gnbd_cmd(&ping_req) = GNBD_CMD_PING;
930+ ping_req.sector = 0;
931+ ping_req.nr_sectors = 0;
932+
933+ for (i = 0; i < MAX_GNBD; i++) {
934+ struct gendisk *disk = alloc_disk(1);
935+ if (!disk)
936+ goto out;
937+ gnbd_dev[i].disk = disk;
938+ /*
939+ * The new linux 2.5 block layer implementation requires
940+ * every gendisk to have its very own request_queue struct.
941+ * These structs are big so we dynamically allocate them.
942+ */
943+ disk->queue = blk_init_queue(do_gnbd_request, &gnbd_lock);
944+ if (!disk->queue) {
945+ put_disk(disk);
946+ goto out;
947+ }
948+ }
949+ major_nr = register_blkdev(major_nr, "gnbd");
950+ if (major_nr < 0) {
951+ printk("gnbd: unable to get a major number\n");
952+ err = major_nr;
953+ goto out;
954+ }
955+
956+ printk(KERN_INFO "gnbd: registered device at major %d\n", major_nr);
957+ dprintk(DBG_INIT, "gnbd: debugflags=0x%x\n", debugflags);
958+
6461ecac 959+ devfs_mk_dir("gnbd_minor");
c1c6733f
AM
960+ err = class_register(&gnbd_class);
961+ if (err)
962+ goto out_unregister;
963+ for (i = 0; i < MAX_GNBD; i++) {
964+ struct gendisk *disk = gnbd_dev[i].disk;
965+ gnbd_dev[i].file = NULL;
966+ gnbd_dev[i].magic = GNBD_MAGIC;
967+ gnbd_dev[i].flags = 0;
968+ gnbd_dev[i].open_count = 0;
969+ gnbd_dev[i].receiver_pid = -1;
6461ecac 970+ gnbd_dev[i].server_name = NULL;
c1c6733f
AM
971+ gnbd_dev[i].server_port = 0;
972+ gnbd_dev[i].name[0] = '\0';
973+ gnbd_dev[i].bdev = NULL;
974+ spin_lock_init(&gnbd_dev[i].queue_lock);
975+ spin_lock_init(&gnbd_dev[i].open_lock);
976+ INIT_LIST_HEAD(&gnbd_dev[i].queue_head);
977+ init_MUTEX(&gnbd_dev[i].tx_lock);
978+ init_MUTEX(&gnbd_dev[i].do_it_lock);
979+ gnbd_dev[i].class_dev.class = &gnbd_class;
980+ sprintf(gnbd_dev[i].class_dev.class_id, "gnbd%d", i);
981+ err = class_device_register(&gnbd_dev[i].class_dev);
982+ if (err){
983+ printk("class_device_register failed with %d\n", err);
984+ goto out_unregister_class;
985+ }
986+ if(class_device_create_file(&gnbd_dev[i].class_dev,
987+ &class_device_attr_pid))
988+ goto out_remove_file;
989+ if(class_device_create_file(&gnbd_dev[i].class_dev,
990+ &class_device_attr_server))
991+ goto out_remove_file;
992+ if(class_device_create_file(&gnbd_dev[i].class_dev,
993+ &class_device_attr_name))
994+ goto out_remove_file;
995+ if(class_device_create_file(&gnbd_dev[i].class_dev,
996+ &class_device_attr_sectors))
997+ goto out_remove_file;
998+ if(class_device_create_file(&gnbd_dev[i].class_dev,
999+ &class_device_attr_usage))
1000+ goto out_remove_file;
1001+ if(class_device_create_file(&gnbd_dev[i].class_dev,
1002+ &class_device_attr_flags))
1003+ goto out_remove_file;
1004+ if(class_device_create_file(&gnbd_dev[i].class_dev,
1005+ &class_device_attr_waittime))
1006+ goto out_remove_file;
1007+ if(class_device_create_file(&gnbd_dev[i].class_dev,
1008+ &class_device_attr_connected))
1009+ goto out_remove_file;
1010+ disk->major = major_nr;
1011+ disk->first_minor = i;
1012+ disk->fops = &gnbd_fops;
1013+ disk->private_data = &gnbd_dev[i];
1014+ sprintf(disk->disk_name, "gnbd%d", i);
6461ecac 1015+ sprintf(disk->devfs_name, "gnbd_minor/%d", i);
c1c6733f
AM
1016+ set_capacity(disk, 0);
1017+ add_disk(disk);
1018+ if(sysfs_create_link(&gnbd_dev[i].class_dev.kobj,
1019+ &gnbd_dev[i].disk->kobj, "block"))
1020+ goto out_remove_disk;
1021+
1022+ }
1023+
1024+ err = gnbd_ctl_init();
1025+ if (err)
1026+ goto out_unregister_class;
1027+
1028+ insmod_time = (uint64_t) tv.tv_sec * 1000000 + tv.tv_usec;
1029+
1030+ return 0;
1031+out_remove_disk:
1032+ del_gendisk(gnbd_dev[i].disk);
1033+out_remove_file:
1034+ class_device_unregister(&gnbd_dev[i].class_dev);
1035+out_unregister_class:
1036+ while(i--){
1037+ del_gendisk(gnbd_dev[i].disk);
1038+ class_device_unregister(&gnbd_dev[i].class_dev);
1039+ }
1040+ i = MAX_GNBD;
1041+ class_unregister(&gnbd_class);
1042+out_unregister:
1043+ unregister_blkdev(major_nr, "gnbd");
1044+out:
1045+ while (i--) {
1046+ blk_cleanup_queue(gnbd_dev[i].disk->queue);
1047+ put_disk(gnbd_dev[i].disk);
1048+ }
1049+ return err;
1050+}
1051+
1052+static void __exit gnbd_cleanup(void)
1053+{
1054+ int i;
1055+
1056+ gnbd_ctl_cleanup();
1057+ for (i = 0; i < MAX_GNBD; i++) {
1058+ struct gendisk *disk = gnbd_dev[i].disk;
b7b72b66 1059+ class_device_unregister(&gnbd_dev[i].class_dev);
c1c6733f
AM
1060+ if (disk) {
1061+ del_gendisk(disk);
1062+ blk_cleanup_queue(disk->queue);
1063+ put_disk(disk);
1064+ }
6461ecac
AM
1065+ if (gnbd_dev[i].server_name)
1066+ kfree(gnbd_dev[i].server_name);
c1c6733f
AM
1067+ }
1068+ class_unregister(&gnbd_class);
1069+ devfs_remove("gnbd");
1070+ unregister_blkdev(major_nr, "gnbd");
1071+ printk(KERN_INFO "gnbd: unregistered device at major %d\n", major_nr);
1072+}
1073+
1074+module_init(gnbd_init);
1075+module_exit(gnbd_cleanup);
1076+
1077+MODULE_DESCRIPTION("Network Block Device");
1078+MODULE_LICENSE("GPL");
1079+
1080+#ifndef NDEBUG
1081+MODULE_PARM(debugflags, "i");
1082+MODULE_PARM_DESC(debugflags, "flags for controlling debug output");
1083+#endif
c783755a
AM
1084diff -urN linux-2.6.9/include/linux/gnbd.h linux-2.6.9-patched/include/linux/gnbd.h
1085--- linux-2.6.9/include/linux/gnbd.h 1969-12-31 18:00:00.000000000 -0600
1086+++ linux-2.6.9-patched/include/linux/gnbd.h 2004-10-22 13:39:34.000000000 -0500
c1c6733f
AM
1087@@ -0,0 +1,103 @@
1088+/******************************************************************************
1089+*******************************************************************************
1090+**
1091+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
1092+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
1093+**
1094+** This copyrighted material is made available to anyone wishing to use,
1095+** modify, copy, or redistribute it subject to the terms and conditions
1096+** of the GNU General Public License v.2.
1097+**
1098+*******************************************************************************
1099+******************************************************************************/
1100+
1101+#ifndef LINUX_GNBD_H
1102+#define LINUX_GNBD_H
1103+
1104+#define GNBD_DO_IT _IO( 0xab, 0x20 )
1105+#define GNBD_CLEAR_QUE _IO( 0xab, 0x21 )
1106+#define GNBD_PRINT_DEBUG _IO( 0xab, 0x22 )
1107+#define GNBD_DISCONNECT _IO( 0xab, 0x23 )
1108+#define GNBD_PING _IO( 0xab, 0x24 )
1109+#define GNBD_GET_TIME _IO( 0xab, 0x25 )
1110+
1111+enum {
1112+ GNBD_CMD_READ = 0,
1113+ GNBD_CMD_WRITE = 1,
1114+ GNBD_CMD_DISC = 2,
1115+ GNBD_CMD_PING = 3
1116+};
1117+
1118+#define gnbd_cmd(req) ((req)->cmd[0])
1119+#define MAX_GNBD 128
1120+
1121+/* values for flags field */
1122+#define GNBD_READ_ONLY 0x0001
1123+
1124+/* userspace doesn't need the gnbd_device structure */
1125+#ifdef __KERNEL__
1126+
1127+struct gnbd_device {
1128+ unsigned short int flags;
1129+ struct socket * sock;
1130+ struct file * file; /* If == NULL, device is not ready, yet */
1131+ int magic;
1132+ spinlock_t queue_lock;
1133+ spinlock_t open_lock;
1134+ struct list_head queue_head;/* Requests are added here... */
1135+ struct semaphore tx_lock;
1136+ struct gendisk *disk;
1137+ pid_t receiver_pid;
1138+ struct semaphore do_it_lock;
1139+ int open_count;
1140+ struct class_device class_dev;
1141+ unsigned short int server_port;
6461ecac 1142+ char *server_name;
c1c6733f
AM
1143+ char name[32];
1144+ unsigned long last_received;
1145+ struct block_device *bdev;
1146+};
1147+
1148+#endif /* __KERNEL__ */
1149+
1150+/* These are sent over the network in the request/reply magic fields */
1151+
1152+#define GNBD_REQUEST_MAGIC 0x37a07e00
1153+#define GNBD_REPLY_MAGIC 0x41f09370
1154+#define GNBD_KEEP_ALIVE_MAGIC 0x5B46D8C2
1155+/* Do *not* use magics: 0x12560953 0x96744668. */
1156+
1157+/*
1158+ * This is the packet used for communication between client and
1159+ * server. All data are in network byte order.
1160+ */
1161+struct gnbd_request {
1162+ uint32_t magic;
1163+ uint32_t type; /* == READ || == WRITE why so long */
1164+ char handle[8]; /* why is this a char array instead of a u64 */
1165+ uint64_t from;
1166+ uint32_t len;
1167+}
1168+#ifdef __GNUC__
1169+ __attribute__ ((packed))
1170+#endif /* __GNUC__ */
1171+;
1172+
1173+/*
1174+ * This is the reply packet that gnbd-server sends back to the client after
1175+ * it has completed an I/O request (or an error occurs).
1176+ */
1177+#define SIZE_OF_REPLY 16
1178+struct gnbd_reply {
1179+ uint32_t magic;
1180+ uint32_t error; /* 0 = ok, else error */
1181+ char handle[8]; /* handle you got from request */
1182+};
1183+
1184+struct do_it_req_s {
1185+ unsigned int minor;
1186+ int sock_fd;
1187+};
1188+typedef struct do_it_req_s do_it_req_t;
1189+
1190+#endif /* LINUX_GNBD_H */
This page took 0.204279 seconds and 4 git commands to generate.