]> git.pld-linux.org Git - packages/beignet.git/blob - beignet-in-order-queue.patch
- added dirty llvm11-support patch, now at least builds with llvm 11
[packages/beignet.git] / beignet-in-order-queue.patch
1 Description: Make in-order command queues actually be in-order
2
3 When beignet added out-of-order execution support (7fd45f15),
4 it made *all* queues use it, even ones that are nominally in-order.
5
6 While using out-of-order queues is probably a good idea when possible
7 (for performance), the OpenCL spec does not allow it to be the default.
8
9 Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
10 Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009213.html
11
12 --- a/src/cl_api.c
13 +++ b/src/cl_api.c
14 @@ -276,7 +276,7 @@ clEnqueueSVMFree (cl_command_queue command_queue,
15      data->size      = num_svm_pointers;
16      data->ptr       = user_data;
17  
18 -    if (e_status == CL_COMPLETE) {
19 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
20        // Sync mode, no need to queue event.
21        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
22        if (err != CL_SUCCESS) {
23 @@ -422,7 +422,7 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue,
24      data->const_ptr    = src_ptr;
25      data->size         = size;
26  
27 -    if (e_status == CL_COMPLETE) {
28 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
29        // Sync mode, no need to queue event.
30        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
31        if (err != CL_SUCCESS) {
32 @@ -434,6 +434,9 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue,
33          break;
34        }
35        cl_command_queue_enqueue_event(command_queue, e);
36 +      if (blocking_copy) {
37 +        cl_event_wait_for_events_list(1, &e);
38 +      }
39      }
40    } while(0);
41  
42 @@ -511,7 +514,7 @@ cl_int clEnqueueSVMMemFill (cl_command_queue command_queue,
43      data->pattern_size = pattern_size;
44      data->size         = size;
45  
46 -    if (e_status == CL_COMPLETE) {
47 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
48        // Sync mode, no need to queue event.
49        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
50        if (err != CL_SUCCESS) {
51 --- a/src/cl_api_kernel.c
52 +++ b/src/cl_api_kernel.c
53 @@ -223,6 +223,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
54      count *= global_wk_sz_rem[2] ? 2 : 1;
55  
56      const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem};
57 +    cl_bool allow_immediate_submit = cl_command_queue_allow_bypass_submit(command_queue);
58      /* Go through the at most 8 cases and euque if there is work items left */
59      for (i = 0; i < 2; i++) {
60        for (j = 0; j < 2; j++) {
61 @@ -263,7 +264,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
62              break;
63            }
64  
65 -          err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
66 +          err = cl_event_exec(e, ((allow_immediate_submit && event_status == CL_COMPLETE) ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
67            if (err != CL_SUCCESS) {
68              break;
69            }
70 --- a/src/cl_api_mem.c
71 +++ b/src/cl_api_mem.c
72 @@ -308,7 +308,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
73      if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
74        data->write_map = 1;
75  
76 -    if (e_status == CL_COMPLETE) {
77 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
78        // Sync mode, no need to queue event.
79        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
80        if (err != CL_SUCCESS) {
81 @@ -321,6 +321,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
82        }
83  
84        cl_command_queue_enqueue_event(command_queue, e);
85 +      if (blocking_map) {
86 +        cl_event_wait_for_events_list(1, &e);
87 +      }
88      }
89  
90      ptr = data->ptr;
91 @@ -393,7 +396,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
92      data->mem_obj = memobj;
93      data->ptr = mapped_ptr;
94  
95 -    if (e_status == CL_COMPLETE) { // No need to wait
96 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // No need to wait
97        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
98        if (err != CL_SUCCESS) {
99          break;
100 @@ -495,7 +498,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
101      data->offset = offset;
102      data->size = size;
103  
104 -    if (e_status == CL_COMPLETE) {
105 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
106        // Sync mode, no need to queue event.
107        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
108        if (err != CL_SUCCESS) {
109 @@ -507,6 +510,9 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
110          break;
111        }
112        cl_command_queue_enqueue_event(command_queue, e);
113 +      if (blocking_read) {
114 +        cl_event_wait_for_events_list(1, &e);
115 +      }
116      }
117    } while (0);
118  
119 @@ -598,7 +604,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
120      data->offset = offset;
121      data->size = size;
122  
123 -    if (e_status == CL_COMPLETE) {
124 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
125        // Sync mode, no need to queue event.
126        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
127        if (err != CL_SUCCESS) {
128 @@ -610,6 +616,9 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
129          break;
130        }
131        cl_command_queue_enqueue_event(command_queue, e);
132 +      if (blocking_write) {
133 +        cl_event_wait_for_events_list(1, &e);
134 +      }
135      }
136    } while (0);
137  
138 @@ -747,7 +756,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
139      data->host_row_pitch = host_row_pitch;
140      data->host_slice_pitch = host_slice_pitch;
141  
142 -    if (e_status == CL_COMPLETE) {
143 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
144        // Sync mode, no need to queue event.
145        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
146        if (err != CL_SUCCESS) {
147 @@ -759,6 +768,9 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
148          break;
149        }
150        cl_command_queue_enqueue_event(command_queue, e);
151 +      if (blocking_read) {
152 +        cl_event_wait_for_events_list(1, &e);
153 +      }
154      }
155    } while (0);
156  
157 @@ -898,7 +910,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
158      data->host_row_pitch = host_row_pitch;
159      data->host_slice_pitch = host_slice_pitch;
160  
161 -    if (e_status == CL_COMPLETE) {
162 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
163        // Sync mode, no need to queue event.
164        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
165        if (err != CL_SUCCESS) {
166 @@ -910,6 +922,9 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
167          break;
168        }
169        cl_command_queue_enqueue_event(command_queue, e);
170 +      if (blocking_write) {
171 +        cl_event_wait_for_events_list(1, &e);
172 +      }
173      }
174    } while (0);
175  
176 @@ -1017,7 +1032,7 @@ clEnqueueCopyBuffer(cl_command_queue command_queue,
177        break;
178      }
179  
180 -    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
181 +    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
182      if (err != CL_SUCCESS) {
183        break;
184      }
185 @@ -1207,7 +1222,7 @@ clEnqueueCopyBufferRect(cl_command_queue command_queue,
186      if (e_status < CL_COMPLETE) { // Error happend, cancel.
187        err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
188        break;
189 -    } else if (e_status == CL_COMPLETE) {
190 +    } else if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
191        err = cl_event_exec(e, CL_SUBMITTED, CL_FALSE);
192        if (err != CL_SUCCESS) {
193          break;
194 @@ -1308,7 +1323,7 @@ clEnqueueFillBuffer(cl_command_queue command_queue,
195        break;
196      }
197  
198 -    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
199 +    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
200      if (err != CL_SUCCESS) {
201        break;
202      }
203 @@ -1395,7 +1410,7 @@ clEnqueueMigrateMemObjects(cl_command_queue command_queue,
204        break;
205      }
206  
207 -    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
208 +    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
209      if (err != CL_SUCCESS) {
210        break;
211      }
212 @@ -1574,7 +1589,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
213      if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
214        data->write_map = 1;
215  
216 -    if (e_status == CL_COMPLETE) {
217 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
218        // Sync mode, no need to queue event.
219        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
220        if (err != CL_SUCCESS) {
221 @@ -1587,6 +1602,9 @@ clEnqueueMapImage(cl_command_queue command_queue,
222        }
223  
224        cl_command_queue_enqueue_event(command_queue, e);
225 +      if (blocking_map) {
226 +        cl_event_wait_for_events_list(1, &e);
227 +      }
228      }
229  
230      ptr = data->ptr;
231 @@ -1764,7 +1782,7 @@ clEnqueueReadImage(cl_command_queue command_queue,
232      data->row_pitch = row_pitch;
233      data->slice_pitch = slice_pitch;
234  
235 -    if (e_status == CL_COMPLETE) {
236 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
237        // Sync mode, no need to queue event.
238        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
239        if (err != CL_SUCCESS) {
240 @@ -1776,6 +1794,9 @@ clEnqueueReadImage(cl_command_queue command_queue,
241          break;
242        }
243        cl_command_queue_enqueue_event(command_queue, e);
244 +      if (blocking_read) {
245 +        cl_event_wait_for_events_list(1, &e);
246 +      }
247      }
248    } while (0);
249  
250 @@ -1916,7 +1937,7 @@ clEnqueueWriteImage(cl_command_queue command_queue,
251      data->row_pitch = row_pitch;
252      data->slice_pitch = slice_pitch;
253  
254 -    if (e_status == CL_COMPLETE) {
255 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
256        // Sync mode, no need to queue event.
257        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
258        if (err != CL_SUCCESS) {
259 @@ -1928,6 +1949,9 @@ clEnqueueWriteImage(cl_command_queue command_queue,
260          break;
261        }
262        cl_command_queue_enqueue_event(command_queue, e);
263 +      if (blocking_write) {
264 +        cl_event_wait_for_events_list(1, &e);
265 +      }
266      }
267    } while (0);
268  
269 @@ -2062,7 +2086,7 @@ clEnqueueCopyImage(cl_command_queue command_queue,
270        break;
271      }
272  
273 -    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
274 +    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
275      if (err != CL_SUCCESS) {
276        break;
277      }
278 @@ -2173,7 +2197,7 @@ clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
279        break;
280      }
281  
282 -    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
283 +    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
284      if (err != CL_SUCCESS) {
285        break;
286      }
287 @@ -2285,7 +2309,7 @@ clEnqueueCopyBufferToImage(cl_command_queue command_queue,
288        break;
289      }
290  
291 -    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
292 +    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
293      if (err != CL_SUCCESS) {
294        break;
295      }
296 @@ -2395,7 +2419,7 @@ clEnqueueFillImage(cl_command_queue command_queue,
297        break;
298      }
299  
300 -    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
301 +    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
302      if (err != CL_SUCCESS) {
303        break;
304      }
305 --- a/src/cl_command_queue.h
306 +++ b/src/cl_command_queue.h
307 @@ -103,6 +103,11 @@ extern cl_int cl_command_queue_wait_finish(cl_command_queue queue);
308  extern cl_int cl_command_queue_wait_flush(cl_command_queue queue);
309  /* Note: Must call this function with queue's lock. */
310  extern cl_event *cl_command_queue_record_in_queue_events(cl_command_queue queue, cl_uint *list_num);
311 +/* Whether it is valid to call cl_event_exec directly, instead of cl_command_queue_enqueue_event */
312 +static inline cl_bool cl_command_queue_allow_bypass_submit(cl_command_queue queue){
313 +  return (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)/* if out-of-order, always */
314 +    || list_empty(&queue->worker.enqueued_events);/* if in-order, only if empty */
315 +}
316  
317  #endif /* __CL_COMMAND_QUEUE_H__ */
318  
319 --- a/src/cl_command_queue_enqueue.c
320 +++ b/src/cl_command_queue_enqueue.c
321 @@ -65,6 +65,8 @@ worker_thread_function(void *Arg)
322        if (cl_event_is_ready(e) <= CL_COMPLETE) {
323          list_node_del(&e->enqueue_node);
324          list_add_tail(&ready_list, &e->enqueue_node);
325 +      } else if(!(queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)){
326 +        break; /* in in-order mode, can't skip over non-ready events */
327        }
328      }
329  
330 @@ -80,18 +82,20 @@ worker_thread_function(void *Arg)
331      CL_OBJECT_UNLOCK(queue);
332  
333      /* Do the really job without lock.*/
334 -    exec_status = CL_SUBMITTED;
335 -    list_for_each_safe(pos, n, &ready_list)
336 -    {
337 -      e = list_entry(pos, _cl_event, enqueue_node);
338 -      cl_event_exec(e, exec_status, CL_FALSE);
339 -    }
340 +    if (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /* in in-order mode, need to get each all the way to CL_COMPLETE before starting the next one */
341 +      exec_status = CL_SUBMITTED;
342 +      list_for_each_safe(pos, n, &ready_list)
343 +      {
344 +        e = list_entry(pos, _cl_event, enqueue_node);
345 +        cl_event_exec(e, exec_status, CL_FALSE);
346 +      }
347  
348 -    /* Notify all waiting for flush. */
349 -    CL_OBJECT_LOCK(queue);
350 -    worker->in_exec_status = CL_SUBMITTED;
351 -    CL_OBJECT_NOTIFY_COND(queue);
352 -    CL_OBJECT_UNLOCK(queue);
353 +      /* Notify all waiting for flush. */
354 +      CL_OBJECT_LOCK(queue);
355 +      worker->in_exec_status = CL_SUBMITTED;
356 +      CL_OBJECT_NOTIFY_COND(queue);
357 +      CL_OBJECT_UNLOCK(queue);
358 +    }
359  
360      list_for_each_safe(pos, n, &ready_list)
361      {
362 --- a/src/cl_gl_api.c
363 +++ b/src/cl_gl_api.c
364 @@ -188,7 +188,7 @@ cl_int clEnqueueAcquireGLObjects (cl_command_queue command_queue,
365      data = &e->exec_data;
366      data->type = EnqueueReturnSuccesss;
367  
368 -    if (e_status == CL_COMPLETE) {
369 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
370        // Sync mode, no need to queue event.
371        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
372        if (err != CL_SUCCESS) {
373 @@ -274,7 +274,7 @@ cl_int clEnqueueReleaseGLObjects (cl_command_queue command_queue,
374      data = &e->exec_data;
375      data->type = EnqueueReturnSuccesss;
376  
377 -    if (e_status == CL_COMPLETE) {
378 +    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
379        // Sync mode, no need to queue event.
380        err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
381        if (err != CL_SUCCESS) {
This page took 0.101688 seconds and 3 git commands to generate.