diff -urN linux-2.6.9/drivers/block/Kconfig linux-2.6.9-patched/drivers/block/Kconfig --- linux-2.6.9/drivers/block/Kconfig 2004-10-18 16:53:43.000000000 -0500 +++ linux-2.6.9-patched/drivers/block/Kconfig 2004-10-22 13:27:52.938836304 -0500 @@ -356,6 +356,13 @@ your machine, or if you want to have a raid or loopback device bigger than 2TB. Otherwise say N. +config BLK_DEV_GNBD + tristate "Global network block device support" + depends on NET + ---help--- + + If unsure, say N. + source "drivers/s390/block/Kconfig" endmenu diff -urN linux-2.6.9/drivers/block/Makefile linux-2.6.9-patched/drivers/block/Makefile --- linux-2.6.9/drivers/block/Makefile 2004-10-18 16:54:55.000000000 -0500 +++ linux-2.6.9-patched/drivers/block/Makefile 2004-10-22 13:30:48.224188880 -0500 @@ -43,4 +43,4 @@ obj-$(CONFIG_VIODASD) += viodasd.o obj-$(CONFIG_BLK_DEV_SX8) += sx8.o obj-$(CONFIG_BLK_DEV_UB) += ub.o - +obj-$(CONFIG_BLK_DEV_GNBD) += gnbd.o diff -urN linux-2.6.9/drivers/block/gnbd.c linux-2.6.9-patched/drivers/block/gnbd.c --- linux-2.6.9/drivers/block/gnbd.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.9-patched/drivers/block/gnbd.c 2004-10-22 13:43:33.303879088 -0500 @@ -0,0 +1,1053 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +/* Large chunks of this code were lifted from nbd.c */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +static int major_nr = 0; +uint64_t insmod_time; + + +#define GNBD_MAGIC 0x74d06100 + +#ifdef NDEBUG +#define dprintk(flags, fmt...) +#else /* NDEBUG */ +#define dprintk(flags, fmt...) do { \ + if (debugflags & (flags)) printk(KERN_DEBUG fmt); \ +} while (0) +#define DBG_IOCTL 0x0004 +#define DBG_INIT 0x0010 +#define DBG_EXIT 0x0020 +#define DBG_BLKDEV 0x0100 +#define DBG_RX 0x0200 +#define DBG_TX 0x0400 +static unsigned int debugflags; +#endif /* NDEBUG */ + +static struct gnbd_device gnbd_dev[MAX_GNBD]; + +struct request shutdown_req; +struct request ping_req; + +static spinlock_t gnbd_lock = SPIN_LOCK_UNLOCKED; + +#define to_gnbd_dev(d) container_of(d, struct gnbd_device, class_dev) + +static void gnbd_class_release(struct class_device *class_dev) +{ + printk("releasing gnbd class\n"); + /* FIXME -- What the hell do I have to free up here */ +} + +static struct class gnbd_class = { + .name = "gnbd", + .release = gnbd_class_release +}; + + +static ssize_t show_pid(struct class_device *class_dev, char *buf) +{ + struct gnbd_device *dev = to_gnbd_dev(class_dev); + return sprintf(buf, "%d\n", dev->receiver_pid); +} + +static CLASS_DEVICE_ATTR(pid, S_IRUGO, show_pid, NULL); + +static ssize_t show_server(struct class_device *class_dev, char *buf) +{ + struct gnbd_device *dev = to_gnbd_dev(class_dev); + if (dev->server_name) + return sprintf(buf, "%s/%hx\n", dev->server_name, + dev->server_port); + else + return sprintf(buf, "\n"); +} + +/* FIXME -- should a empty store free the memory */ +static ssize_t store_server(struct class_device *class_dev, + const char *buf, size_t count) +{ + int res; + short unsigned int port; + char *ptr; + struct gnbd_device *dev = to_gnbd_dev(class_dev); + if (down_trylock(&dev->do_it_lock)) + return -EBUSY; + if (dev->server_name) + kfree(dev->server_name); + dev->server_name = kmalloc(count + 1, GFP_KERNEL); + if (!dev->server_name) + return -ENOMEM; + memcpy(dev->server_name, buf, count); + dev->server_name[count] = 0; + ptr = strchr(dev->server_name, '/'); + if (!ptr) + return -EINVAL; + *ptr++ = 0; + res = sscanf(ptr, "%4hx", &port); + if (res != 1){ + up(&dev->do_it_lock); + return -EINVAL; + } + dev->server_port = port; + up(&dev->do_it_lock); + return count; +} + +CLASS_DEVICE_ATTR(server, S_IRUGO | S_IWUSR, show_server, store_server); + +static ssize_t show_name(struct class_device *class_dev, char *buf) +{ + struct gnbd_device *dev = to_gnbd_dev(class_dev); + return sprintf(buf, "%s\n", dev->name); +} + +static ssize_t store_name(struct class_device *class_dev, + const char *buf, size_t count) +{ + int res; + struct gnbd_device *dev = to_gnbd_dev(class_dev); + if (down_trylock(&dev->do_it_lock)) + return -EBUSY; + res = sscanf(buf, "%31s", dev->name); + up(&dev->do_it_lock); + if (res != 1) + return -EINVAL; + return count; +} + +CLASS_DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show_name, store_name); + + +static ssize_t show_sectors(struct class_device *class_dev, char *buf) +{ + struct gnbd_device *dev = to_gnbd_dev(class_dev); + return sprintf(buf, "%Lu\n", + (unsigned long long)get_capacity(dev->disk)); +} + +static ssize_t store_sectors(struct class_device *class_dev, + const char *buf, size_t count) +{ + int res; + sector_t size; + struct block_device *bdev; + struct gnbd_device *dev = to_gnbd_dev(class_dev); + + if (down_trylock(&dev->do_it_lock)) + return -EBUSY; + res = sscanf(buf, "%Lu\n", &size); + if (res != 1){ + up(&dev->do_it_lock); + return -EINVAL; + } + /* FIXME -- should I switch the order here, so that I don't have + capacity set to one thing and the bdev inode size set to another */ + set_capacity(dev->disk, size); + bdev = bdget_disk(dev->disk, 0); + if (bdev) { + down(&bdev->bd_inode->i_sem); + i_size_write(bdev->bd_inode, (loff_t)size << 9); + up(&bdev->bd_inode->i_sem); + bdput(bdev); + } + up(&dev->do_it_lock); + return count; +} + +CLASS_DEVICE_ATTR(sectors, S_IRUGO | S_IWUSR, show_sectors, store_sectors); + +static ssize_t show_usage(struct class_device *class_dev, char *buf) +{ + struct gnbd_device *dev = to_gnbd_dev(class_dev); + return sprintf(buf, "%d\n", dev->open_count); +} + +CLASS_DEVICE_ATTR(usage, S_IRUGO, show_usage, NULL); + +static ssize_t show_flags(struct class_device *class_dev, char *buf) +{ + struct gnbd_device *dev = to_gnbd_dev(class_dev); + return sprintf(buf, "0x%04x\n", dev->flags); +} + +static ssize_t store_flags(struct class_device *class_dev, + const char *buf, size_t count) +{ + int res; + + struct gnbd_device *dev = to_gnbd_dev(class_dev); + if (down_trylock(&dev->do_it_lock)) + return -EBUSY; + res = sscanf(buf, "0x%hx", &dev->flags); + up(&dev->do_it_lock); + if (res != 1) + return -EINVAL; + return count; +} + + +CLASS_DEVICE_ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags); + +static ssize_t show_waittime(struct class_device *class_dev, char *buf) +{ + struct gnbd_device *dev = to_gnbd_dev(class_dev); + if (list_empty(&dev->queue_head)) + return sprintf(buf, "-1\n"); + return sprintf(buf, "%ld\n", + ((long)jiffies - (long)dev->last_received) / HZ); +} + +CLASS_DEVICE_ATTR(waittime, S_IRUGO, show_waittime, NULL); + +static ssize_t show_connected(struct class_device *class_dev, char *buf) +{ + struct gnbd_device *dev = to_gnbd_dev(class_dev); + return sprintf(buf, "%d\n", (dev->sock != NULL)); +} + +CLASS_DEVICE_ATTR(connected, S_IRUGO, show_connected, NULL); + +#ifndef NDEBUG +static const char *ioctl_cmd_to_ascii(int cmd) +{ + switch (cmd) { + case GNBD_DO_IT: return "do-it"; + case GNBD_CLEAR_QUE: return "clear-que"; + case GNBD_PRINT_DEBUG: return "print-debug"; + case GNBD_DISCONNECT: return "disconnect"; + } + return "unknown"; +} + +static const char *gnbdcmd_to_ascii(int cmd) +{ + switch (cmd) { + case GNBD_CMD_READ: return "read"; + case GNBD_CMD_WRITE: return "write"; + case GNBD_CMD_DISC: return "disconnect"; + case GNBD_CMD_PING: return "ping"; + } + return "invalid"; +} +#endif /* NDEBUG */ + +static void gnbd_end_request(struct request *req) +{ + int uptodate = (req->errors == 0) ? 1 : 0; + request_queue_t *q = req->q; + unsigned long flags; + + dprintk(DBG_BLKDEV, "%s: request %p: %s\n", req->rq_disk->disk_name, + req, uptodate? "done": "failed"); + + if (!uptodate) + printk("%s %d called gnbd_end_request with and error\n", + current->comm, current->pid); + + spin_lock_irqsave(q->queue_lock, flags); + if (!end_that_request_first(req, uptodate, req->nr_sectors)) { + end_that_request_last(req); + } + spin_unlock_irqrestore(q->queue_lock, flags); +} + +/* + * Send or receive packet. + */ +static int sock_xmit(struct socket *sock, int send, void *buf, int size, + int msg_flags) +{ + mm_segment_t oldfs; + int result; + struct msghdr msg; + struct iovec iov; + unsigned long flags; + sigset_t oldset; + + oldfs = get_fs(); + set_fs(get_ds()); + /* Allow interception of SIGKILL only + * Don't allow other signals to interrupt the transmission */ + spin_lock_irqsave(¤t->sighand->siglock, flags); + oldset = current->blocked; + sigfillset(¤t->blocked); + sigdelsetmask(¤t->blocked, sigmask(SIGKILL) | sigmask(SIGTERM) | + sigmask(SIGHUP)); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + + do { + sock->sk->sk_allocation = GFP_NOIO; + iov.iov_base = buf; + iov.iov_len = size; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + msg.msg_flags = msg_flags | MSG_NOSIGNAL; + + if (send) + result = sock_sendmsg(sock, &msg, size); + else + result = sock_recvmsg(sock, &msg, size, 0); + + if (signal_pending(current)) { + siginfo_t info; + spin_lock_irqsave(¤t->sighand->siglock, flags); + printk(KERN_WARNING "gnbd (pid %d: %s) got signal %d\n", + current->pid, current->comm, + dequeue_signal(current, ¤t->blocked, &info)); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + result = -EINTR; + break; + } + + if (result <= 0) { + if (result == 0) + result = -EPIPE; /* short read */ + break; + } + size -= result; + buf += result; + } while (size > 0); + + spin_lock_irqsave(¤t->sighand->siglock, flags); + current->blocked = oldset; + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + + set_fs(oldfs); + return result; +} + +static inline int sock_send_bvec(struct socket *sock, struct bio_vec *bvec, + int flags) +{ + int result; + void *kaddr = kmap(bvec->bv_page); + result = sock_xmit(sock, 1, kaddr + bvec->bv_offset, bvec->bv_len, + flags); + kunmap(bvec->bv_page); + return result; +} + + +#define gnbd_send_req(dev, req) __gnbd_send_req((dev), (dev)->sock, (req)) + +int __gnbd_send_req(struct gnbd_device *dev, struct socket *sock, + struct request *req) +{ + int result, i, flags; + struct gnbd_request request; + unsigned long size = req->nr_sectors << 9; + + request.magic = htonl(GNBD_REQUEST_MAGIC); + request.type = htonl(gnbd_cmd(req)); + request.from = cpu_to_be64((u64) req->sector << 9); + request.len = htonl(size); + memcpy(request.handle, &req, sizeof(req)); + + down(&dev->tx_lock); + + if (!sock) { + printk(KERN_ERR "%s: Attempted send on closed socket\n", + dev->disk->disk_name); + result = -ENOTCONN; + goto error_out; + } + + dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n", + dev->disk->disk_name, req, + gnbdcmd_to_ascii(gnbd_cmd(req)), + (unsigned long long)req->sector << 9, + req->nr_sectors << 9); + result = sock_xmit(sock, 1, &request, sizeof(request), + (gnbd_cmd(req) == GNBD_CMD_WRITE)? MSG_MORE: 0); + if (result < 0) { + printk(KERN_ERR "%s: Send control failed (result %d)\n", + dev->disk->disk_name, result); + goto error_out; + } + + if (gnbd_cmd(req) == GNBD_CMD_WRITE) { + struct bio *bio; + /* + * we are really probing at internals to determine + * whether to set MSG_MORE or not... + */ + rq_for_each_bio(bio, req) { + struct bio_vec *bvec; + bio_for_each_segment(bvec, bio, i) { + flags = 0; + if ((i < (bio->bi_vcnt - 1)) || bio->bi_next) + flags = MSG_MORE; + dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n", + dev->disk->disk_name, req, + bvec->bv_len); + result = sock_send_bvec(sock, bvec, flags); + if (result < 0) { + printk(KERN_ERR "%s: Send data failed (result %d)\n", + dev->disk->disk_name, + result); + goto error_out; + } + } + } + } + up(&dev->tx_lock); + return 0; + +error_out: + up(&dev->tx_lock); + return result; +} + + +static int gnbd_find_request(struct gnbd_device *dev, struct request *xreq) +{ + struct request *req; + struct list_head *tmp; + + list_for_each(tmp, &dev->queue_head) { + req = list_entry(tmp, struct request, queuelist); + if (req != xreq) + continue; + return 1; + } + return 0; +} + +static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec) +{ + int result; + void *kaddr = kmap(bvec->bv_page); + result = sock_xmit(sock, 0, kaddr + bvec->bv_offset, bvec->bv_len, + MSG_WAITALL); + kunmap(bvec->bv_page); + return result; +} + +int gnbd_recv_req(struct gnbd_device *dev, struct request *req) +{ + int result; + int i; + struct bio *bio; + rq_for_each_bio(bio, req) { + struct bio_vec *bvec; + bio_for_each_segment(bvec, bio, i) { + result = sock_recv_bvec(dev->sock, bvec); + if (result < 0) { + printk(KERN_ERR "%s: Receive data failed (result %d)\n", + dev->disk->disk_name, + result); + return result; + } + dprintk(DBG_RX, "%s: request %p: got %d bytes data\n", + dev->disk->disk_name, req, bvec->bv_len); + } + } + return 0; +} + +int gnbd_do_it(struct gnbd_device *dev) +{ + int result; + struct gnbd_reply reply; + struct request *req; + struct socket *sock = dev->sock; + + BUG_ON(dev->magic != GNBD_MAGIC); + + while((result = sock_xmit(sock, 0, &reply,sizeof(reply), MSG_WAITALL)) > 0){ + if (ntohl(reply.magic) == GNBD_KEEP_ALIVE_MAGIC) + /* FIXME -- I should reset the wait time here */ + continue; + + memcpy(&req, reply.handle, sizeof(req)); + if (req == &shutdown_req) + return 0; + + if (!gnbd_find_request(dev, req)){ + printk(KERN_ERR "%s: Unexpected reply (%p)\n", + dev->disk->disk_name, reply.handle); + return -EBADR; + } + if (ntohl(reply.magic) != GNBD_REPLY_MAGIC) { + printk(KERN_ERR "%s: Wrong magic (0x%lx)\n", + dev->disk->disk_name, + (unsigned long)ntohl(reply.magic)); + return -EPROTO; + } + if (ntohl(reply.error)) { + printk(KERN_ERR "%s: Other side returned error (%d)\n", + dev->disk->disk_name, ntohl(reply.error)); + req->errors++; + goto remove_req; + } + dprintk(DBG_RX, "%s: request %p: got reply\n", + dev->disk->disk_name, req); + + if (gnbd_cmd(req) == GNBD_CMD_READ){ + result = gnbd_recv_req(dev, req); + if (result < 0) + return result; + } +remove_req: + spin_lock(&dev->queue_lock); + list_del_init(&req->queuelist); + dev->last_received = jiffies; + spin_unlock(&dev->queue_lock); + if (req != &ping_req) + gnbd_end_request(req); + } + printk(KERN_ERR "%s: Receive control failed (result %d)\n", + dev->disk->disk_name, result); + return result; +} + +void gnbd_clear_que(struct gnbd_device *dev) +{ + struct request *req; + + BUG_ON(dev->magic != GNBD_MAGIC); + + do { + req = NULL; + if (!list_empty(&dev->queue_head)) { + req = list_entry(dev->queue_head.next, struct request, queuelist); + list_del_init(&req->queuelist); + } + if (req && req != &ping_req) { + req->errors++; + gnbd_end_request(req); + } + } while (req); +} + +/* + * We always wait for result of write, for now. It would be nice to make it optional + * in future + * if ((req->cmd == WRITE) && (dev->flags & GNBD_WRITE_NOCHK)) + * { printk( "Warning: Ignoring result!\n"); gnbd_end_request( req ); } + */ + +static void do_gnbd_request(request_queue_t * q) +{ + int err; + struct request *req; + + while ((req = elv_next_request(q)) != NULL) { + struct gnbd_device *dev; + + blkdev_dequeue_request(req); + dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n", + req->rq_disk->disk_name, req, req->flags); + + if (!(req->flags & REQ_CMD)) + goto error_out; + + dev = req->rq_disk->private_data; + + if (dev->receiver_pid == -1) + goto error_out; + + BUG_ON(dev->magic != GNBD_MAGIC); + + gnbd_cmd(req) = GNBD_CMD_READ; + if (rq_data_dir(req) == WRITE) { + gnbd_cmd(req) = GNBD_CMD_WRITE; + if (dev->flags & GNBD_READ_ONLY) { + printk(KERN_ERR "%s: Write on read-only\n", + dev->disk->disk_name); + goto error_out; + } + } + + req->errors = 0; + spin_unlock_irq(q->queue_lock); + + spin_lock(&dev->queue_lock); + + if (list_empty(&dev->queue_head)) + dev->last_received = jiffies; + list_add(&req->queuelist, &dev->queue_head); + spin_unlock(&dev->queue_lock); + + err = gnbd_send_req(dev, req); + + spin_lock_irq(q->queue_lock); + if (err) + goto sock_error; + continue; + +error_out: + req->errors++; + spin_unlock(q->queue_lock); + gnbd_end_request(req); + spin_lock(q->queue_lock); + } + return; + +sock_error: + return; +} + +/* + * This is called before dev-sock is set, so you dodn't need + * to worry about the tx_lock or the queue_lock + */ +static int gnbd_resend_requests(struct gnbd_device *dev, struct socket *sock) +{ + int err = 0; + struct request *req; + struct list_head *tmp; + + printk("resending requests\n"); + list_for_each(tmp, &dev->queue_head) { + req = list_entry(tmp, struct request, queuelist); + err = __gnbd_send_req(dev, sock, req); + + if (err){ + printk("failed trying to resend request (%d)\n", err); + break; + } + } + + return err; +} +/* +static int get_server_info(struct gnbd_device *dev, struct socket *sock) +{ + struct sockaddr_in server; + int len; + int err; + + err = sock->ops->getname(sock, (struct sockaddr *) &server, &len, 1); + if (err) { + printk(KERN_WARNING "cannot get socket info, shutting down\n"); + } else{ + dev->server_addr = server.sin_addr; + dev->server_port = server.sin_port; + } + return err; +} +*/ + +static int gnbd_ctl_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct gnbd_device *dev = NULL; + struct block_device *bdev; + do_it_req_t req; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (cmd == GNBD_DISCONNECT || cmd == GNBD_CLEAR_QUE || + cmd == GNBD_PING || cmd == GNBD_PRINT_DEBUG) { + if (arg >= MAX_GNBD) + return -EINVAL; + dev = &gnbd_dev[arg]; + BUG_ON(dev->magic != GNBD_MAGIC); + } + + /* Anyone capable of this syscall can do *real bad* things */ + dprintk(DBG_IOCTL, "%s: gnbd_ioctl cmd=%s(0x%x) arg=%lu\n", + dev->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg); + + switch (cmd) { + case GNBD_DISCONNECT: + printk(KERN_INFO "%s: GNBD_DISCONNECT\n", dev->disk->disk_name); + spin_lock(&dev->open_lock); + if (dev->open_count > 0){ + spin_unlock(&dev->open_lock); + return -EBUSY; + } + dev->receiver_pid = -1; + spin_unlock(&dev->open_lock); + /* There is no one using the device, you can disconnect it */ + if (dev->sock == NULL) + return -ENOTCONN; + gnbd_send_req(dev, &shutdown_req); + return 0; + case GNBD_CLEAR_QUE: + if (down_interruptible(&dev->do_it_lock)) + return -EBUSY; + dev->receiver_pid = -1; + gnbd_clear_que(dev); + bdev = dev->bdev; + if (bdev) { + blk_run_queue(dev->disk->queue); + fsync_bdev(bdev); + invalidate_bdev(bdev, 0); + } + up(&dev->do_it_lock); + return 0; + case GNBD_DO_IT: + if (copy_from_user(&req, (do_it_req_t *)arg, sizeof(req))) + return -EFAULT; + if (req.minor >= 128) + return -EINVAL; + dev = &gnbd_dev[req.minor]; + BUG_ON(dev->magic != GNBD_MAGIC); + if (dev->file) + return -EBUSY; + error = -EINVAL; + file = fget(req.sock_fd); + if (!file) + return error; + inode = file->f_dentry->d_inode; + if (!inode->i_sock) { + fput(file); + return error; + } + if (down_trylock(&dev->do_it_lock)){ + fput(file); + return -EBUSY; + } + error = gnbd_resend_requests(dev, SOCKET_I(inode)); + if (error){ + printk("quitting NBD_DO_IT\n"); + up(&dev->do_it_lock); + fput(file); + return error; + } + dev->file = file; + dev->sock = SOCKET_I(inode); + dev->receiver_pid = current->pid; + blk_run_queue(dev->disk->queue); + error = gnbd_do_it(dev); + /* should I kill the socket first */ + up(&dev->do_it_lock); + down(&dev->tx_lock); + if (dev->sock) { + printk(KERN_WARNING "%s: shutting down socket\n", + dev->disk->disk_name); + dev->sock->ops->shutdown(dev->sock, + SEND_SHUTDOWN|RCV_SHUTDOWN); + dev->sock = NULL; + } + up(&dev->tx_lock); + file = dev->file; + dev->file = NULL; + if (file) + fput(file); + printk("exitting GNBD_DO_IT ioctl\n"); + return error; + case GNBD_PING: + /* FIXME -- should I allow pings if everything is compeletely + * shutdown */ + spin_lock(&dev->queue_lock); + /* only one outstanding ping at a time */ + if (list_empty(&ping_req.queuelist)){ + if (list_empty(&dev->queue_head)) + dev->last_received = jiffies; + list_add(&ping_req.queuelist, &dev->queue_head); + } + spin_unlock(&dev->queue_lock); + gnbd_send_req(dev, &ping_req); /* ignore the errors */ + return 0; + case GNBD_PRINT_DEBUG: + printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n", + dev->disk->disk_name, + dev->queue_head.next, dev->queue_head.prev, + &dev->queue_head); + return 0; + case GNBD_GET_TIME: + if (copy_to_user((void *)arg, &insmod_time, sizeof(uint64_t))){ + printk(KERN_WARNING "couldn't compy time argument to user\n"); + return -EFAULT; + } + return 0; + } + /* FIXME -- should I print something, is EINVAL the right error */ + return -EINVAL; +} + +static int gnbd_open(struct inode *inode, struct file *file) +{ + struct gnbd_device *dev = inode->i_bdev->bd_disk->private_data; + spin_lock(&dev->open_lock); + if (dev->receiver_pid == -1){ + spin_unlock(&dev->open_lock); + return -ENXIO; + } + spin_unlock(&dev->open_lock); + if ((file->f_mode & FMODE_WRITE) && (dev->flags & GNBD_READ_ONLY)){ + printk(KERN_INFO "cannot open read only gnbd device read/write"); + return -EROFS; + } + + dev->open_count++; + dev->bdev = inode->i_bdev; + return 0; +} + +/* FIXME -- I don't sync the device at close. This means that If you write + * something, and close the device, and expect that then it is written, + * you are wrong.... This might cause problems */ +static int gnbd_release(struct inode *inode, struct file *file) +{ + struct gnbd_device *dev = inode->i_bdev->bd_disk->private_data; + + dev->open_count--; + if (dev->open_count == 0) + dev->bdev = NULL; + return 0; +} + +static struct file_operations _gnbd_ctl_fops = +{ + .ioctl = gnbd_ctl_ioctl, + .owner = THIS_MODULE, +}; + +static struct miscdevice _gnbd_misc = +{ + .minor = MISC_DYNAMIC_MINOR, + .name = "gnbd_ctl", + .devfs_name = "gnbd_ctl", + .fops = &_gnbd_ctl_fops +}; + +/* FIXME -- I should probably do more here */ +int __init gnbd_ctl_init(void) +{ + int err; + + err = misc_register(&_gnbd_misc); + if (err) { + printk("cannot register control device\n"); + return err; + } + return 0; +} + +void gnbd_ctl_cleanup(void) +{ + if (misc_deregister(&_gnbd_misc) < 0) + printk("cannot deregister control device\n"); +} + +static struct block_device_operations gnbd_fops = +{ + .open = gnbd_open, + .release = gnbd_release, + .owner = THIS_MODULE, +}; + +/* + * And here should be modules and kernel interface + * (Just smiley confuses emacs :-) + */ + +static int __init gnbd_init(void) +{ + int err = -ENOMEM; + struct timeval tv; + int i; + + if (sizeof(struct gnbd_request) != 28) { + printk(KERN_CRIT "gnbd: sizeof gnbd_request needs to be 28 in order to work!\n" ); + return -EIO; + } + shutdown_req.flags = REQ_SPECIAL; + gnbd_cmd(&shutdown_req) = GNBD_CMD_DISC; + shutdown_req.sector = 0; + shutdown_req.nr_sectors = 0; + + ping_req.flags = REQ_SPECIAL; + gnbd_cmd(&ping_req) = GNBD_CMD_PING; + ping_req.sector = 0; + ping_req.nr_sectors = 0; + + for (i = 0; i < MAX_GNBD; i++) { + struct gendisk *disk = alloc_disk(1); + if (!disk) + goto out; + gnbd_dev[i].disk = disk; + /* + * The new linux 2.5 block layer implementation requires + * every gendisk to have its very own request_queue struct. + * These structs are big so we dynamically allocate them. + */ + disk->queue = blk_init_queue(do_gnbd_request, &gnbd_lock); + if (!disk->queue) { + put_disk(disk); + goto out; + } + } + major_nr = register_blkdev(major_nr, "gnbd"); + if (major_nr < 0) { + printk("gnbd: unable to get a major number\n"); + err = major_nr; + goto out; + } + + printk(KERN_INFO "gnbd: registered device at major %d\n", major_nr); + dprintk(DBG_INIT, "gnbd: debugflags=0x%x\n", debugflags); + + devfs_mk_dir("gnbd_minor"); + err = class_register(&gnbd_class); + if (err) + goto out_unregister; + for (i = 0; i < MAX_GNBD; i++) { + struct gendisk *disk = gnbd_dev[i].disk; + gnbd_dev[i].file = NULL; + gnbd_dev[i].magic = GNBD_MAGIC; + gnbd_dev[i].flags = 0; + gnbd_dev[i].open_count = 0; + gnbd_dev[i].receiver_pid = -1; + gnbd_dev[i].server_name = NULL; + gnbd_dev[i].server_port = 0; + gnbd_dev[i].name[0] = '\0'; + gnbd_dev[i].bdev = NULL; + spin_lock_init(&gnbd_dev[i].queue_lock); + spin_lock_init(&gnbd_dev[i].open_lock); + INIT_LIST_HEAD(&gnbd_dev[i].queue_head); + init_MUTEX(&gnbd_dev[i].tx_lock); + init_MUTEX(&gnbd_dev[i].do_it_lock); + gnbd_dev[i].class_dev.class = &gnbd_class; + sprintf(gnbd_dev[i].class_dev.class_id, "gnbd%d", i); + err = class_device_register(&gnbd_dev[i].class_dev); + if (err){ + printk("class_device_register failed with %d\n", err); + goto out_unregister_class; + } + if(class_device_create_file(&gnbd_dev[i].class_dev, + &class_device_attr_pid)) + goto out_remove_file; + if(class_device_create_file(&gnbd_dev[i].class_dev, + &class_device_attr_server)) + goto out_remove_file; + if(class_device_create_file(&gnbd_dev[i].class_dev, + &class_device_attr_name)) + goto out_remove_file; + if(class_device_create_file(&gnbd_dev[i].class_dev, + &class_device_attr_sectors)) + goto out_remove_file; + if(class_device_create_file(&gnbd_dev[i].class_dev, + &class_device_attr_usage)) + goto out_remove_file; + if(class_device_create_file(&gnbd_dev[i].class_dev, + &class_device_attr_flags)) + goto out_remove_file; + if(class_device_create_file(&gnbd_dev[i].class_dev, + &class_device_attr_waittime)) + goto out_remove_file; + if(class_device_create_file(&gnbd_dev[i].class_dev, + &class_device_attr_connected)) + goto out_remove_file; + disk->major = major_nr; + disk->first_minor = i; + disk->fops = &gnbd_fops; + disk->private_data = &gnbd_dev[i]; + sprintf(disk->disk_name, "gnbd%d", i); + sprintf(disk->devfs_name, "gnbd_minor/%d", i); + set_capacity(disk, 0); + add_disk(disk); + if(sysfs_create_link(&gnbd_dev[i].class_dev.kobj, + &gnbd_dev[i].disk->kobj, "block")) + goto out_remove_disk; + + } + + err = gnbd_ctl_init(); + if (err) + goto out_unregister_class; + + insmod_time = (uint64_t) tv.tv_sec * 1000000 + tv.tv_usec; + + return 0; +out_remove_disk: + del_gendisk(gnbd_dev[i].disk); +out_remove_file: + class_device_unregister(&gnbd_dev[i].class_dev); +out_unregister_class: + while(i--){ + del_gendisk(gnbd_dev[i].disk); + class_device_unregister(&gnbd_dev[i].class_dev); + } + i = MAX_GNBD; + class_unregister(&gnbd_class); +out_unregister: + unregister_blkdev(major_nr, "gnbd"); +out: + while (i--) { + blk_cleanup_queue(gnbd_dev[i].disk->queue); + put_disk(gnbd_dev[i].disk); + } + return err; +} + +static void __exit gnbd_cleanup(void) +{ + int i; + + gnbd_ctl_cleanup(); + for (i = 0; i < MAX_GNBD; i++) { + struct gendisk *disk = gnbd_dev[i].disk; + class_device_unregister(&gnbd_dev[i].class_dev); + if (disk) { + del_gendisk(disk); + blk_cleanup_queue(disk->queue); + put_disk(disk); + } + if (gnbd_dev[i].server_name) + kfree(gnbd_dev[i].server_name); + } + class_unregister(&gnbd_class); + devfs_remove("gnbd"); + unregister_blkdev(major_nr, "gnbd"); + printk(KERN_INFO "gnbd: unregistered device at major %d\n", major_nr); +} + +module_init(gnbd_init); +module_exit(gnbd_cleanup); + +MODULE_DESCRIPTION("Network Block Device"); +MODULE_LICENSE("GPL"); + +#ifndef NDEBUG +MODULE_PARM(debugflags, "i"); +MODULE_PARM_DESC(debugflags, "flags for controlling debug output"); +#endif diff -urN linux-2.6.9/include/linux/gnbd.h linux-2.6.9-patched/include/linux/gnbd.h --- linux-2.6.9/include/linux/gnbd.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-2.6.9-patched/include/linux/gnbd.h 2004-10-22 13:39:34.000000000 -0500 @@ -0,0 +1,103 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef LINUX_GNBD_H +#define LINUX_GNBD_H + +#define GNBD_DO_IT _IO( 0xab, 0x20 ) +#define GNBD_CLEAR_QUE _IO( 0xab, 0x21 ) +#define GNBD_PRINT_DEBUG _IO( 0xab, 0x22 ) +#define GNBD_DISCONNECT _IO( 0xab, 0x23 ) +#define GNBD_PING _IO( 0xab, 0x24 ) +#define GNBD_GET_TIME _IO( 0xab, 0x25 ) + +enum { + GNBD_CMD_READ = 0, + GNBD_CMD_WRITE = 1, + GNBD_CMD_DISC = 2, + GNBD_CMD_PING = 3 +}; + +#define gnbd_cmd(req) ((req)->cmd[0]) +#define MAX_GNBD 128 + +/* values for flags field */ +#define GNBD_READ_ONLY 0x0001 + +/* userspace doesn't need the gnbd_device structure */ +#ifdef __KERNEL__ + +struct gnbd_device { + unsigned short int flags; + struct socket * sock; + struct file * file; /* If == NULL, device is not ready, yet */ + int magic; + spinlock_t queue_lock; + spinlock_t open_lock; + struct list_head queue_head;/* Requests are added here... */ + struct semaphore tx_lock; + struct gendisk *disk; + pid_t receiver_pid; + struct semaphore do_it_lock; + int open_count; + struct class_device class_dev; + unsigned short int server_port; + char *server_name; + char name[32]; + unsigned long last_received; + struct block_device *bdev; +}; + +#endif /* __KERNEL__ */ + +/* These are sent over the network in the request/reply magic fields */ + +#define GNBD_REQUEST_MAGIC 0x37a07e00 +#define GNBD_REPLY_MAGIC 0x41f09370 +#define GNBD_KEEP_ALIVE_MAGIC 0x5B46D8C2 +/* Do *not* use magics: 0x12560953 0x96744668. */ + +/* + * This is the packet used for communication between client and + * server. All data are in network byte order. + */ +struct gnbd_request { + uint32_t magic; + uint32_t type; /* == READ || == WRITE why so long */ + char handle[8]; /* why is this a char array instead of a u64 */ + uint64_t from; + uint32_t len; +} +#ifdef __GNUC__ + __attribute__ ((packed)) +#endif /* __GNUC__ */ +; + +/* + * This is the reply packet that gnbd-server sends back to the client after + * it has completed an I/O request (or an error occurs). + */ +#define SIZE_OF_REPLY 16 +struct gnbd_reply { + uint32_t magic; + uint32_t error; /* 0 = ok, else error */ + char handle[8]; /* handle you got from request */ +}; + +struct do_it_req_s { + unsigned int minor; + int sock_fd; +}; +typedef struct do_it_req_s do_it_req_t; + +#endif /* LINUX_GNBD_H */