1 --- a/tools/blktap/drivers/block-aio.c 2006-09-21 13:45:44.000000000 +0100
2 +++ b/tools/blktap/drivers/block-aio.c 2006-09-21 19:58:18.000000000 +0100
8 #include <sys/statvfs.h>
10 #include <sys/ioctl.h>
16 - * We used a kernel patch to return an fd associated with the AIO context
17 - * so that we can concurrently poll on synchronous and async descriptors.
18 - * This is signalled by passing 1 as the io context to io_setup.
20 -#define REQUEST_ASYNC_FD 1
22 #define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ)
27 struct iocb *iocb_queue[MAX_AIO_REQS];
29 - int poll_fd; /* NB: we require aio_poll support */
30 struct io_event aio_events[MAX_AIO_REQS];
32 + pthread_t aio_thread;
33 + /* pipe fds for communication with the aio completion thread */
35 + int completion_fd[2];
38 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
40 +static void *tdaio_completion_thread(void *);
42 /*Get Image size, secsize*/
43 static int get_image_info(struct td_state *s, int fd)
47 unsigned long total_size;
48 - struct statvfs statBuf;
51 ret = fstat(fd, &stat);
53 /*Get the sector size*/
54 #if defined(BLKSSZGET)
57 s->sector_size = DEFAULT_SECTOR_SIZE;
58 ioctl(fd, BLKSSZGET, &s->sector_size);
61 prv->iocb_free_count = MAX_AIO_REQS;
64 - prv->aio_ctx = (io_context_t) REQUEST_ASYNC_FD;
65 - prv->poll_fd = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
66 + prv->aio_ctx = (io_context_t) 0;
67 + ret = io_setup(MAX_AIO_REQS, &prv->aio_ctx);
69 - if (prv->poll_fd < 0) {
73 DPRINTF("Couldn't setup AIO context. If you are "
74 "trying to concurrently use a large number "
76 "(e.g. 'echo echo 1048576 > /proc/sys/fs/"
79 - DPRINTF("Couldn't get fd for AIO poll support. This "
80 - "is probably because your kernel does not "
81 - "have the aio-poll patch applied.\n");
82 + DPRINTF("Couldn't setup AIO context.\n");
90 + pipe(prv->command_fd);
91 + pipe(prv->completion_fd);
93 + ret = pthread_create(&prv->aio_thread, NULL,
94 + tdaio_completion_thread, prv);
96 + write(prv->command_fd[1], &ret, sizeof(ret));
99 ret = get_image_info(s, fd);
103 /*initialise the FD array*/
104 for(i=0;i<MAX_IOFD;i++) fds[i] = 0;
106 - fds[0] = prv->poll_fd;
107 + fds[0] = prv->completion_fd[0];
111 @@ -296,17 +299,61 @@
116 + * We don't have any way to do epoll on aio events in a normal kernel, so
117 + * wait for aio events in a separate thread and return completion status
118 + * that via a pipe that can be waited on normally.
120 + * To keep locking problems between the completion thread and the submit
121 + * thread to a minimum, there's a handshake which allows only one thread
122 + * to be doing work on the completion queue at a time:
124 + * 1) main thread sends completion thread a command via the command pipe;
125 + * 2) completion thread waits for aio events and returns the number
126 + * received on the completion pipe
127 + * 3) main thread processes the received prv->aio_events events
128 + * 4) loop back to 1) to let the completion thread refill the aio_events
131 + * This workaround needs to disappear once the kernel provides a single
132 + * mechanism for waiting on both aio and normal fd wakeups.
135 +static void *tdaio_completion_thread(void *arg)
137 + struct tdaio_state *prv = (struct tdaio_state *) arg;
143 + rc = read(prv->command_fd[0], &command, sizeof(command));
146 + /* Non-blocking test for completed io. */
147 + rc = io_getevents(prv->aio_ctx, 0,
148 + MAX_AIO_REQS, prv->aio_events,
152 + rc = write(prv->completion_fd[1], &nr_events,
153 + sizeof(nr_events));
159 int tdaio_do_callbacks(struct td_state *s, int sid)
161 - int ret, i, rsp = 0;
162 + int ret, i, nr_events, rsp = 0;
164 struct tdaio_state *prv = (struct tdaio_state *)s->private;
166 /* Non-blocking test for completed io. */
167 - ret = io_getevents(prv->aio_ctx, 0, MAX_AIO_REQS, prv->aio_events,
170 - for (ep=prv->aio_events,i=ret; i-->0; ep++) {
171 + ret = read(prv->completion_fd[0], &nr_events, sizeof(nr_events));
174 + for (ep=prv->aio_events,i=nr_events; i-->0; ep++) {
175 struct iocb *io = ep->obj;
176 struct pending_aio *pio;
180 prv->iocb_free[prv->iocb_free_count++] = io;
184 + nr_events = io_getevents(prv->aio_ctx, 0,
185 + MAX_AIO_REQS, prv->aio_events,
190 + write(prv->command_fd[1], &nr_events, sizeof(nr_events));
195 --- xen-unstable-11539/tools/blktap/drivers/block-aio.c.~1~ 2006-09-25 19:27:39.000000000 +0100
196 +++ xen-unstable-11539/tools/blktap/drivers/block-aio.c 2006-09-25 20:00:24.000000000 +0100
200 /* Non-blocking test for completed io. */
201 - rc = io_getevents(prv->aio_ctx, 0,
202 + rc = io_getevents(prv->aio_ctx, 1,
203 MAX_AIO_REQS, prv->aio_events,