summaryrefslogtreecommitdiff
blob: 9331e9f4bb9525cb828a926127021d3f3fce44d9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
Subject: AIO/POLL interaction
From: http://xenbits.xensource.com/xen-3.1-testing.hg (tip 15042)
Acked-by: jbeulich@novell.com

---
 fs/aio.c            |  120 ++++++++++++++++++++++++++++++++++++++++++++++++----
 include/linux/aio.h |    5 ++
 2 files changed, 116 insertions(+), 9 deletions(-)

--- a/fs/aio.c	2007-08-27 12:09:26.000000000 -0400
+++ b/fs/aio.c	2007-08-27 14:01:24.000000000 -0400
@@ -36,6 +36,11 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
+#ifdef CONFIG_EPOLL
+#include <linux/poll.h>
+#include <linux/anon_inodes.h>
+#endif
+
 #if DEBUG > 1
 #define dprintk		printk
 #else
@@ -1009,6 +1014,11 @@ put_rq:
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
 
+#ifdef CONFIG_EPOLL
+	if (ctx->file && waitqueue_active(&ctx->poll_wait))
+		wake_up(&ctx->poll_wait);
+#endif
+
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	return ret;
 }
@@ -1016,6 +1026,8 @@ put_rq:
 /* aio_read_evt
  *	Pull an event off of the ioctx's event ring.  Returns the number of 
  *	events fetched (0 or 1 ;-)
+ *	If ent parameter is 0, just returns the number of events that would
+ *	be fetched.
  *	FIXME: make this use cmpxchg.
  *	TODO: make the ringbuffer user mmap()able (requires FIXME).
  */
@@ -1038,13 +1050,18 @@ static int aio_read_evt(struct kioctx *i
 
 	head = ring->head % info->nr;
 	if (head != ring->tail) {
-		struct io_event *evp = aio_ring_event(info, head, KM_USER1);
-		*ent = *evp;
-		head = (head + 1) % info->nr;
-		smp_mb(); /* finish reading the event before updatng the head */
-		ring->head = head;
-		ret = 1;
-		put_aio_ring_event(evp, KM_USER1);
+		if (ent) { /* event requested */
+			struct io_event *evp =
+				aio_ring_event(info, head, KM_USER1);
+			*ent = *evp;
+			head = (head + 1) % info->nr;
+			/* finish reading the event before updatng the head */
+			smp_mb();
+			ring->head = head;
+			ret = 1;
+			put_aio_ring_event(evp, KM_USER1);
+		} else /* only need to know availability */
+			ret = 1;
 	}
 	spin_unlock(&info->ring_lock);
 
@@ -1227,9 +1244,78 @@ static void io_destroy(struct kioctx *io
 
 	aio_cancel_all(ioctx);
 	wait_for_all_aios(ioctx);
+#ifdef CONFIG_EPOLL
+	/* forget the poll file, but it's up to the user to close it */
+	if (ioctx->file) {
+		ioctx->file->private_data = 0;
+		ioctx->file = 0;
+	}
+#endif
 	put_ioctx(ioctx);	/* once for the lookup */
 }
 
+#ifdef CONFIG_EPOLL
+
+static int aio_queue_fd_close(struct inode *inode, struct file *file)
+{
+	struct kioctx *ioctx = file->private_data;
+	if (ioctx) {
+		file->private_data = 0;
+		spin_lock_irq(&ioctx->ctx_lock);
+		ioctx->file = 0;
+		spin_unlock_irq(&ioctx->ctx_lock);
+	}
+	return 0;
+}
+
+static unsigned int aio_queue_fd_poll(struct file *file, poll_table *wait)
+{	unsigned int pollflags = 0;
+	struct kioctx *ioctx = file->private_data;
+
+	if (ioctx) {
+
+		spin_lock_irq(&ioctx->ctx_lock);
+		/* Insert inside our poll wait queue */
+		poll_wait(file, &ioctx->poll_wait, wait);
+
+		/* Check our condition */
+		if (aio_read_evt(ioctx, 0))
+			pollflags = POLLIN | POLLRDNORM;
+		spin_unlock_irq(&ioctx->ctx_lock);
+	}
+
+	return pollflags;
+}
+
+static const struct file_operations aioq_fops = {
+	.release	= aio_queue_fd_close,
+	.poll		= aio_queue_fd_poll
+};
+
+/* make_aio_fd:
+ *  Create a file descriptor that can be used to poll the event queue.
+ *  Based on the excellent epoll code.
+ */
+
+static int make_aio_fd(struct kioctx *ioctx)
+{
+	int error, fd;
+	struct inode *inode;
+	struct file *file;
+
+	error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
+				 &aioq_fops, ioctx);
+	if (error)
+		return error;
+
+	/* associate the file with the IO context */
+	ioctx->file = file;
+	init_waitqueue_head(&ioctx->poll_wait);
+	return fd;
+}
+#endif
+
+
 /* sys_io_setup:
  *	Create an aio_context capable of receiving at least nr_events.
  *	ctxp must not point to an aio_context that already exists, and
@@ -1242,18 +1328,30 @@ static void io_destroy(struct kioctx *io
  *	resources are available.  May fail with -EFAULT if an invalid
  *	pointer is passed for ctxp.  Will fail with -ENOSYS if not
  *	implemented.
+ *
+ *	To request a selectable fd, the user context has to be initialized
+ *	to 1, instead of 0, and the return value is the fd.
+ *	This keeps the system call compatible, since a non-zero value
+ *	was not allowed so far.
  */
 asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp)
 {
 	struct kioctx *ioctx = NULL;
 	unsigned long ctx;
 	long ret;
+	int make_fd = 0;
 
 	ret = get_user(ctx, ctxp);
 	if (unlikely(ret))
 		goto out;
 
 	ret = -EINVAL;
+#ifdef CONFIG_EPOLL
+	if (ctx == 1) {
+		make_fd = 1;
+		ctx = 0;
+	}
+#endif
 	if (unlikely(ctx || nr_events == 0)) {
 		pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n",
 		         ctx, nr_events);
@@ -1264,8 +1362,12 @@ asmlinkage long sys_io_setup(unsigned nr
 	ret = PTR_ERR(ioctx);
 	if (!IS_ERR(ioctx)) {
 		ret = put_user(ioctx->user_id, ctxp);
-		if (!ret)
-			return 0;
+#ifdef CONFIG_EPOLL
+		if (make_fd && ret >= 0)
+			ret = make_aio_fd(ioctx);
+#endif
+		if (ret >= 0)
+			return ret;
 
 		get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
 		io_destroy(ioctx);
--- a/include/linux/aio.h	2007-08-27 12:09:26.000000000 -0400
+++ b/include/linux/aio.h	2007-08-27 14:01:24.000000000 -0400
@@ -201,6 +201,11 @@ struct kioctx {
 	struct aio_ring_info	ring_info;
 
 	struct delayed_work	wq;
+#ifdef CONFIG_EPOLL
+	// poll integration
+	wait_queue_head_t       poll_wait;
+	struct file		*file;
+#endif
 };
 
 /* prototypes */