]> git.pld-linux.org Git - packages/beignet.git/blame - beignet-in-order-queue.patch
- merged some Debian patches
[packages/beignet.git] / beignet-in-order-queue.patch
CommitLineData
7d3d7cb5
JB
1Description: Make in-order command queues actually be in-order
2
3When beignet added out-of-order execution support (7fd45f15),
4it made *all* queues use it, even ones that are nominally in-order.
5
6While using out-of-order queues is probably a good idea when possible
7(for performance), the OpenCL spec does not allow it to be the default.
8
9Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
10Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009213.html
11
12--- a/src/cl_api.c
13+++ b/src/cl_api.c
14@@ -276,7 +276,7 @@ clEnqueueSVMFree (cl_command_queue command_queue,
15 data->size = num_svm_pointers;
16 data->ptr = user_data;
17
18- if (e_status == CL_COMPLETE) {
19+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
20 // Sync mode, no need to queue event.
21 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
22 if (err != CL_SUCCESS) {
23@@ -422,7 +422,7 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue,
24 data->const_ptr = src_ptr;
25 data->size = size;
26
27- if (e_status == CL_COMPLETE) {
28+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
29 // Sync mode, no need to queue event.
30 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
31 if (err != CL_SUCCESS) {
32@@ -434,6 +434,9 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue,
33 break;
34 }
35 cl_command_queue_enqueue_event(command_queue, e);
36+ if (blocking_copy) {
37+ cl_event_wait_for_events_list(1, &e);
38+ }
39 }
40 } while(0);
41
42@@ -511,7 +514,7 @@ cl_int clEnqueueSVMMemFill (cl_command_queue command_queue,
43 data->pattern_size = pattern_size;
44 data->size = size;
45
46- if (e_status == CL_COMPLETE) {
47+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
48 // Sync mode, no need to queue event.
49 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
50 if (err != CL_SUCCESS) {
51--- a/src/cl_api_kernel.c
52+++ b/src/cl_api_kernel.c
53@@ -223,6 +223,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
54 count *= global_wk_sz_rem[2] ? 2 : 1;
55
56 const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem};
57+ cl_bool allow_immediate_submit = cl_command_queue_allow_bypass_submit(command_queue);
58 /* Go through the at most 8 cases and euque if there is work items left */
59 for (i = 0; i < 2; i++) {
60 for (j = 0; j < 2; j++) {
61@@ -263,7 +264,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
62 break;
63 }
64
65- err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
66+ err = cl_event_exec(e, ((allow_immediate_submit && event_status == CL_COMPLETE) ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
67 if (err != CL_SUCCESS) {
68 break;
69 }
70--- a/src/cl_api_mem.c
71+++ b/src/cl_api_mem.c
72@@ -308,7 +308,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
73 if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
74 data->write_map = 1;
75
76- if (e_status == CL_COMPLETE) {
77+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
78 // Sync mode, no need to queue event.
79 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
80 if (err != CL_SUCCESS) {
81@@ -321,6 +321,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
82 }
83
84 cl_command_queue_enqueue_event(command_queue, e);
85+ if (blocking_map) {
86+ cl_event_wait_for_events_list(1, &e);
87+ }
88 }
89
90 ptr = data->ptr;
91@@ -393,7 +396,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
92 data->mem_obj = memobj;
93 data->ptr = mapped_ptr;
94
95- if (e_status == CL_COMPLETE) { // No need to wait
96+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // No need to wait
97 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
98 if (err != CL_SUCCESS) {
99 break;
100@@ -495,7 +498,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
101 data->offset = offset;
102 data->size = size;
103
104- if (e_status == CL_COMPLETE) {
105+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
106 // Sync mode, no need to queue event.
107 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
108 if (err != CL_SUCCESS) {
109@@ -507,6 +510,9 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
110 break;
111 }
112 cl_command_queue_enqueue_event(command_queue, e);
113+ if (blocking_read) {
114+ cl_event_wait_for_events_list(1, &e);
115+ }
116 }
117 } while (0);
118
119@@ -598,7 +604,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
120 data->offset = offset;
121 data->size = size;
122
123- if (e_status == CL_COMPLETE) {
124+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
125 // Sync mode, no need to queue event.
126 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
127 if (err != CL_SUCCESS) {
128@@ -610,6 +616,9 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
129 break;
130 }
131 cl_command_queue_enqueue_event(command_queue, e);
132+ if (blocking_write) {
133+ cl_event_wait_for_events_list(1, &e);
134+ }
135 }
136 } while (0);
137
138@@ -747,7 +756,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
139 data->host_row_pitch = host_row_pitch;
140 data->host_slice_pitch = host_slice_pitch;
141
142- if (e_status == CL_COMPLETE) {
143+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
144 // Sync mode, no need to queue event.
145 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
146 if (err != CL_SUCCESS) {
147@@ -759,6 +768,9 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
148 break;
149 }
150 cl_command_queue_enqueue_event(command_queue, e);
151+ if (blocking_read) {
152+ cl_event_wait_for_events_list(1, &e);
153+ }
154 }
155 } while (0);
156
157@@ -898,7 +910,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
158 data->host_row_pitch = host_row_pitch;
159 data->host_slice_pitch = host_slice_pitch;
160
161- if (e_status == CL_COMPLETE) {
162+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
163 // Sync mode, no need to queue event.
164 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
165 if (err != CL_SUCCESS) {
166@@ -910,6 +922,9 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
167 break;
168 }
169 cl_command_queue_enqueue_event(command_queue, e);
170+ if (blocking_write) {
171+ cl_event_wait_for_events_list(1, &e);
172+ }
173 }
174 } while (0);
175
176@@ -1017,7 +1032,7 @@ clEnqueueCopyBuffer(cl_command_queue command_queue,
177 break;
178 }
179
180- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
181+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
182 if (err != CL_SUCCESS) {
183 break;
184 }
185@@ -1207,7 +1222,7 @@ clEnqueueCopyBufferRect(cl_command_queue command_queue,
186 if (e_status < CL_COMPLETE) { // Error happend, cancel.
187 err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
188 break;
189- } else if (e_status == CL_COMPLETE) {
190+ } else if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
191 err = cl_event_exec(e, CL_SUBMITTED, CL_FALSE);
192 if (err != CL_SUCCESS) {
193 break;
194@@ -1308,7 +1323,7 @@ clEnqueueFillBuffer(cl_command_queue command_queue,
195 break;
196 }
197
198- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
199+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
200 if (err != CL_SUCCESS) {
201 break;
202 }
203@@ -1395,7 +1410,7 @@ clEnqueueMigrateMemObjects(cl_command_queue command_queue,
204 break;
205 }
206
207- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
208+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
209 if (err != CL_SUCCESS) {
210 break;
211 }
212@@ -1574,7 +1589,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
213 if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
214 data->write_map = 1;
215
216- if (e_status == CL_COMPLETE) {
217+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
218 // Sync mode, no need to queue event.
219 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
220 if (err != CL_SUCCESS) {
221@@ -1587,6 +1602,9 @@ clEnqueueMapImage(cl_command_queue command_queue,
222 }
223
224 cl_command_queue_enqueue_event(command_queue, e);
225+ if (blocking_map) {
226+ cl_event_wait_for_events_list(1, &e);
227+ }
228 }
229
230 ptr = data->ptr;
231@@ -1764,7 +1782,7 @@ clEnqueueReadImage(cl_command_queue command_queue,
232 data->row_pitch = row_pitch;
233 data->slice_pitch = slice_pitch;
234
235- if (e_status == CL_COMPLETE) {
236+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
237 // Sync mode, no need to queue event.
238 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
239 if (err != CL_SUCCESS) {
240@@ -1776,6 +1794,9 @@ clEnqueueReadImage(cl_command_queue command_queue,
241 break;
242 }
243 cl_command_queue_enqueue_event(command_queue, e);
244+ if (blocking_read) {
245+ cl_event_wait_for_events_list(1, &e);
246+ }
247 }
248 } while (0);
249
250@@ -1916,7 +1937,7 @@ clEnqueueWriteImage(cl_command_queue command_queue,
251 data->row_pitch = row_pitch;
252 data->slice_pitch = slice_pitch;
253
254- if (e_status == CL_COMPLETE) {
255+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
256 // Sync mode, no need to queue event.
257 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
258 if (err != CL_SUCCESS) {
259@@ -1928,6 +1949,9 @@ clEnqueueWriteImage(cl_command_queue command_queue,
260 break;
261 }
262 cl_command_queue_enqueue_event(command_queue, e);
263+ if (blocking_write) {
264+ cl_event_wait_for_events_list(1, &e);
265+ }
266 }
267 } while (0);
268
269@@ -2062,7 +2086,7 @@ clEnqueueCopyImage(cl_command_queue command_queue,
270 break;
271 }
272
273- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
274+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
275 if (err != CL_SUCCESS) {
276 break;
277 }
278@@ -2173,7 +2197,7 @@ clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
279 break;
280 }
281
282- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
283+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
284 if (err != CL_SUCCESS) {
285 break;
286 }
287@@ -2285,7 +2309,7 @@ clEnqueueCopyBufferToImage(cl_command_queue command_queue,
288 break;
289 }
290
291- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
292+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
293 if (err != CL_SUCCESS) {
294 break;
295 }
296@@ -2395,7 +2419,7 @@ clEnqueueFillImage(cl_command_queue command_queue,
297 break;
298 }
299
300- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
301+ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
302 if (err != CL_SUCCESS) {
303 break;
304 }
305--- a/src/cl_command_queue.h
306+++ b/src/cl_command_queue.h
307@@ -103,6 +103,11 @@ extern cl_int cl_command_queue_wait_finish(cl_command_queue queue);
308 extern cl_int cl_command_queue_wait_flush(cl_command_queue queue);
309 /* Note: Must call this function with queue's lock. */
310 extern cl_event *cl_command_queue_record_in_queue_events(cl_command_queue queue, cl_uint *list_num);
311+/* Whether it is valid to call cl_event_exec directly, instead of cl_command_queue_enqueue_event */
312+static inline cl_bool cl_command_queue_allow_bypass_submit(cl_command_queue queue){
313+ return (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)/* if out-of-order, always */
314+ || list_empty(&queue->worker.enqueued_events);/* if in-order, only if empty */
315+}
316
317 #endif /* __CL_COMMAND_QUEUE_H__ */
318
319--- a/src/cl_command_queue_enqueue.c
320+++ b/src/cl_command_queue_enqueue.c
321@@ -65,6 +65,8 @@ worker_thread_function(void *Arg)
322 if (cl_event_is_ready(e) <= CL_COMPLETE) {
323 list_node_del(&e->enqueue_node);
324 list_add_tail(&ready_list, &e->enqueue_node);
325+ } else if(!(queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)){
326+ break; /* in in-order mode, can't skip over non-ready events */
327 }
328 }
329
330@@ -80,18 +82,20 @@ worker_thread_function(void *Arg)
331 CL_OBJECT_UNLOCK(queue);
332
333 /* Do the really job without lock.*/
334- exec_status = CL_SUBMITTED;
335- list_for_each_safe(pos, n, &ready_list)
336- {
337- e = list_entry(pos, _cl_event, enqueue_node);
338- cl_event_exec(e, exec_status, CL_FALSE);
339- }
340+ if (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /* in in-order mode, need to get each all the way to CL_COMPLETE before starting the next one */
341+ exec_status = CL_SUBMITTED;
342+ list_for_each_safe(pos, n, &ready_list)
343+ {
344+ e = list_entry(pos, _cl_event, enqueue_node);
345+ cl_event_exec(e, exec_status, CL_FALSE);
346+ }
347
348- /* Notify all waiting for flush. */
349- CL_OBJECT_LOCK(queue);
350- worker->in_exec_status = CL_SUBMITTED;
351- CL_OBJECT_NOTIFY_COND(queue);
352- CL_OBJECT_UNLOCK(queue);
353+ /* Notify all waiting for flush. */
354+ CL_OBJECT_LOCK(queue);
355+ worker->in_exec_status = CL_SUBMITTED;
356+ CL_OBJECT_NOTIFY_COND(queue);
357+ CL_OBJECT_UNLOCK(queue);
358+ }
359
360 list_for_each_safe(pos, n, &ready_list)
361 {
362--- a/src/cl_gl_api.c
363+++ b/src/cl_gl_api.c
364@@ -188,7 +188,7 @@ cl_int clEnqueueAcquireGLObjects (cl_command_queue command_queue,
365 data = &e->exec_data;
366 data->type = EnqueueReturnSuccesss;
367
368- if (e_status == CL_COMPLETE) {
369+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
370 // Sync mode, no need to queue event.
371 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
372 if (err != CL_SUCCESS) {
373@@ -274,7 +274,7 @@ cl_int clEnqueueReleaseGLObjects (cl_command_queue command_queue,
374 data = &e->exec_data;
375 data->type = EnqueueReturnSuccesss;
376
377- if (e_status == CL_COMPLETE) {
378+ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
379 // Sync mode, no need to queue event.
380 err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
381 if (err != CL_SUCCESS) {
This page took 0.122129 seconds and 4 git commands to generate.