- merged some Debian patches

author Jakub Bogusz <qboosh@pld-linux.org>

Wed, 19 May 2021 04:35:54 +0000 (06:35 +0200)

committer Jakub Bogusz <qboosh@pld-linux.org>

Wed, 19 May 2021 04:36:04 +0000 (06:36 +0200)
author Jakub Bogusz <qboosh@pld-linux.org>
Wed, 19 May 2021 04:35:54 +0000 (06:35 +0200)
committer Jakub Bogusz <qboosh@pld-linux.org>
Wed, 19 May 2021 04:36:04 +0000 (06:36 +0200)
diff --git a/beignet-accept-ignore--g.patch b/beignet-accept-ignore--g.patch

new file mode 100644 (file)

index 0000000..8a88d96
--- /dev/null
+++ b/beignet-accept-ignore--g.patch
@@ -0,0 +1,21 @@
+Description: Don't error out when -g is passed
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Bug-Debian: https://bugs.debian.org/881054
+Forwarded: https://lists.freedesktop.org/archives/beignet/2019-February/009228.html
+
+--- a/backend/src/backend/program.cpp
++++ b/backend/src/backend/program.cpp
+@@ -985,6 +985,12 @@ EXTEND_QUOTE:
+           continue; // Don't push this str back; ignore it.
+         }
+ 
++        if(str == "-g") {
++            // The OpenCL 2.0 standard requires accepting -g,
++            // but does not require that it actually does anything
++            continue;
++        }
++
+         clOpt.push_back(str);
+       }
+       free(c_str);
diff --git a/beignet-accept-old-create-queue.patch b/beignet-accept-old-create-queue.patch

new file mode 100644 (file)

index 0000000..80f9258
--- /dev/null
+++ b/beignet-accept-old-create-queue.patch
@@ -0,0 +1,48 @@
+Description: Allow clCreateCommandQueue to create out-of-order queues
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009215.html
+
+--- a/src/cl_api_command_queue.c
++++ b/src/cl_api_command_queue.c
+@@ -27,35 +27,11 @@ clCreateCommandQueue(cl_context context,
+                      cl_command_queue_properties properties,
+                      cl_int *errcode_ret)
+ {
+-  cl_command_queue queue = NULL;
+-  cl_int err = CL_SUCCESS;
+-
+-  do {
+-    if (!CL_OBJECT_IS_CONTEXT(context)) {
+-      err = CL_INVALID_CONTEXT;
+-      break;
+-    }
+-
+-    err = cl_devices_list_include_check(context->device_num, context->devices, 1, &device);
+-    if (err)
+-      break;
+-
+-    if (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE)) {
+-      err = CL_INVALID_VALUE;
+-      break;
+-    }
+-
+-    if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /*not supported now.*/
+-      err = CL_INVALID_QUEUE_PROPERTIES;
+-      break;
+-    }
+-
+-    queue = cl_create_command_queue(context, device, properties, 0, &err);
+-  } while (0);
+-
+-  if (errcode_ret)
+-    *errcode_ret = err;
+-  return queue;
++  cl_queue_properties props[3];
++  props[0] = CL_QUEUE_PROPERTIES;
++  props[1] = properties;
++  props[2] = 0;
++  return clCreateCommandQueueWithProperties(context, device, props, errcode_ret);
+ }
+ 
+ /* 2.0 new API for create command queue. */
diff --git a/beignet-cl_accelerator_intel.patch b/beignet-cl_accelerator_intel.patch

new file mode 100644 (file)

index 0000000..c60dcaa
--- /dev/null
+++ b/beignet-cl_accelerator_intel.patch
@@ -0,0 +1,147 @@
+Description: Move cl_intel_accelerator to cl_intel.h
+
+Beignet's own headers define cl_intel_accelerator etc directly in
+cl_ext.h, but khronos-opencl-headers 2.2~ places these in a separate
+file cl_ext_intel.h, and older versions do not include it at all.
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009217.html
+
+--- a/include/CL/cl_intel.h
++++ b/include/CL/cl_intel.h
+@@ -26,6 +26,115 @@
+ extern "C" {
+ #endif
+ 
++#if defined(__CL_EXT_H) && !defined(cl_intel_accelerator)
++#ifdef CL_VERSION_2_2
++#include "CL/cl_ext_intel.h"
++#else
++/*********************************
++* cl_intel_accelerator extension *
++*********************************/
++#define cl_intel_accelerator 1
++#define cl_intel_motion_estimation 1
++
++typedef struct _cl_accelerator_intel*     cl_accelerator_intel;
++typedef cl_uint                           cl_accelerator_type_intel;
++typedef cl_uint                           cl_accelerator_info_intel;
++
++typedef struct _cl_motion_estimation_desc_intel {
++    cl_uint mb_block_type;
++    cl_uint subpixel_mode;
++    cl_uint sad_adjust_mode;
++    cl_uint search_path_type;
++} cl_motion_estimation_desc_intel;
++
++/* Error Codes */
++#define CL_INVALID_ACCELERATOR_INTEL            -1094
++#define CL_INVALID_ACCELERATOR_TYPE_INTEL       -1095
++#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
++#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
++
++/* Deprecated Error Codes */
++#define CL_INVALID_ACCELERATOR_INTEL_DEPRECATED            -6000
++#define CL_INVALID_ACCELERATOR_TYPE_INTEL_DEPRECATED       -6001
++#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL_DEPRECATED -6002
++#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL_DEPRECATED -6003
++
++/* cl_accelerator_type_intel */
++#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL     0x0
++
++/* cl_accelerator_info_intel */
++#define CL_ACCELERATOR_DESCRIPTOR_INTEL                 0x4090
++#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL            0x4091
++#define CL_ACCELERATOR_CONTEXT_INTEL                    0x4092
++#define CL_ACCELERATOR_TYPE_INTEL                       0x4093
++
++/*cl_motion_detect_desc_intel flags */
++#define CL_ME_MB_TYPE_16x16_INTEL                       0x0
++#define CL_ME_MB_TYPE_8x8_INTEL                         0x1
++#define CL_ME_MB_TYPE_4x4_INTEL                         0x2
++
++#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL               0x0
++#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL                  0x1
++#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL                  0x2
++
++#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL                0x0
++#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL                0x1
++
++#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL              0x0
++#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL              0x1
++#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL            0x5
++
++extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
++clCreateAcceleratorINTEL(
++    cl_context                  /* context */,
++    cl_accelerator_type_intel   /* accelerator_type */,
++    size_t                      /* descriptor_size */,
++    const void*                 /* descriptor */,
++    cl_int*                     /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
++
++typedef CL_API_ENTRY cl_accelerator_intel
++    (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
++    cl_context                  /* context */,
++    cl_accelerator_type_intel   /* accelerator_type */,
++    size_t                      /* descriptor_size */,
++    const void*                 /* descriptor */,
++    cl_int*                     /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
++
++extern CL_API_ENTRY cl_int CL_API_CALL
++clGetAcceleratorInfoINTEL
++(
++    cl_accelerator_intel        /* accelerator */,
++    cl_accelerator_info_intel   /* param_name */,
++    size_t                      /* param_value_size */,
++    void*                       /* param_value */,
++    size_t*                     /* param_value_size_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
++
++typedef CL_API_ENTRY cl_int
++    (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
++    cl_accelerator_intel        /* accelerator */,
++    cl_accelerator_info_intel   /* param_name */,
++    size_t                      /* param_value_size */,
++    void*                       /* param_value */,
++    size_t*                     /* param_value_size_ret */ ) CL_EXT_SUFFIX__VERSION_1_2;
++
++extern CL_API_ENTRY cl_int CL_API_CALL
++clRetainAcceleratorINTEL(
++    cl_accelerator_intel        /* accelerator */ ) CL_EXT_SUFFIX__VERSION_1_2;
++
++typedef CL_API_ENTRY cl_int
++    (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
++    cl_accelerator_intel        /* accelerator */ ) CL_EXT_SUFFIX__VERSION_1_2;
++
++extern CL_API_ENTRY cl_int CL_API_CALL
++clReleaseAcceleratorINTEL(
++    cl_accelerator_intel        /* accelerator */ ) CL_EXT_SUFFIX__VERSION_1_2;
++
++typedef CL_API_ENTRY cl_int
++    (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
++    cl_accelerator_intel        /* accelerator */ ) CL_EXT_SUFFIX__VERSION_1_2;
++#endif
++#endif
++
+ #define CL_MEM_PINNABLE (1 << 10)
+ 
+ /* Track allocations and report current number of unfreed allocations */
+--- a/src/cl_accelerator_intel.h
++++ b/src/cl_accelerator_intel.h
+@@ -4,6 +4,7 @@
+ #include "cl_base_object.h"
+ #include "CL/cl.h"
+ #include "CL/cl_ext.h"
++#include "CL/cl_intel.h"
+ #include <stdint.h>
+ 
+ struct _cl_accelerator_intel {
+--- a/src/cl_driver.h
++++ b/src/cl_driver.h
+@@ -24,6 +24,7 @@
+ #include <stdlib.h>
+ #include "cl_driver_type.h"
+ #include "CL/cl_ext.h"
++#include "CL/cl_intel.h"
+ /* Various limitations we should remove actually */
+ #define GEN_MAX_SURFACES 256
+ #define GEN_MAX_SAMPLERS 16
diff --git a/beignet-clearer-type-errors.patch b/beignet-clearer-type-errors.patch

new file mode 100644 (file)

index 0000000..09fc534
--- /dev/null
+++ b/beignet-clearer-type-errors.patch
@@ -0,0 +1,118 @@
+Description: More user-friendly "type not supported" errors
+
+(It would be even better if these returned build failure
+rather than asserting, but that's not as easy)
+     
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Forwarded: https://lists.freedesktop.org/archives/beignet/2017-September/009169.html
+
+--- a/backend/src/backend/gen_insn_selection.cpp
++++ b/backend/src/backend/gen_insn_selection.cpp
+@@ -5253,7 +5253,7 @@ extern bool OCL_DEBUGINFO; // first defi
+           write64Stateless(sel, address, src);
+         sel.pop();
+       } else {
+-        GBE_ASSERT(sel.hasLongType());
++        GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this device");
+         write64Stateless(sel, address, src);
+       }
+     }
+@@ -5838,7 +5838,7 @@ extern bool OCL_DEBUGINFO; // first defi
+ 
+         /* The special case, when dst is half, float->word->half will lose accuracy. */
+         if (dstType == TYPE_HALF) {
+-          GBE_ASSERT(sel.hasHalfType());
++          GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this device");
+           type = GEN_TYPE_HF;
+         }
+ 
+@@ -5879,7 +5879,7 @@ extern bool OCL_DEBUGINFO; // first defi
+ 
+       if (dstType == TYPE_HALF) {
+         /* There is no MOV for Long <---> Half. So Long-->Float-->half. */
+-        GBE_ASSERT(sel.hasLongType());
++        GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this device");
+         GBE_ASSERT(sel.hasHalfType());
+         sel.push();
+         if (sel.isScalarReg(insn.getSrc(0))) {
+@@ -6181,7 +6181,7 @@ extern bool OCL_DEBUGINFO; // first defi
+         }
+       } else if (srcType == ir::TYPE_HALF) {
+         /* No need to consider old platform. if we support half, we must have native long. */
+-        GBE_ASSERT(sel.hasLongType());
++        GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this device");
+         GBE_ASSERT(sel.hasHalfType());
+         uint32_t type = dstType == TYPE_U64 ? GEN_TYPE_UD : GEN_TYPE_D;
+         GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))), TYPE_U32), type);
+@@ -6205,7 +6205,7 @@ extern bool OCL_DEBUGINFO; // first defi
+           sel.MOV(dst, tmp);
+         }
+       } else if (src.type == GEN_TYPE_DF) {
+-        GBE_ASSERT(sel.hasDoubleType());
++        GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device");
+         GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we support native long.
+ 
+         // Just Mov
+@@ -6224,7 +6224,7 @@ extern bool OCL_DEBUGINFO; // first defi
+       const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
+       const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+ 
+-      GBE_ASSERT(sel.hasDoubleType());
++      GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device (if this is a literal, use '1.0f' not '1.0')");
+ 
+       if (sel.isScalarReg(insn.getDst(0))) {
+         // dst is scalar, just MOV and nothing more.
+@@ -6263,7 +6263,7 @@ extern bool OCL_DEBUGINFO; // first defi
+       const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
+       const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+ 
+-      GBE_ASSERT(sel.hasDoubleType());
++      GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device (if this is a literal, use '1.0f' not '1.0')");
+       GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we support half.
+ 
+       if (sel.isScalarReg(insn.getDst(0))) { // uniform case.
+@@ -6329,7 +6329,7 @@ extern bool OCL_DEBUGINFO; // first defi
+       // Special case, half -> char/short.
+       /* [DevBDW+]:   Format conversion to or from HF (Half Float) must be DWord-aligned and
+          strided by a DWord on the destination. */
+-      GBE_ASSERT(sel.hasHalfType());
++      GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this device");
+       GenRegister tmp;
+       sel.push();
+       if (sel.isScalarReg(insn.getSrc(0))) {
+@@ -6361,7 +6361,7 @@ extern bool OCL_DEBUGINFO; // first defi
+       // Special case, char/uchar -> half
+       /* [DevBDW+]:  Format conversion to or from HF (Half Float) must be DWord-aligned and
+          strided by a DWord on the destination. */
+-      GBE_ASSERT(sel.hasHalfType());
++      GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this device");
+       GenRegister tmp = GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))), GEN_TYPE_HF);
+       sel.push();
+       if (sel.isScalarReg(insn.getSrc(0))) {
+@@ -6383,7 +6383,7 @@ extern bool OCL_DEBUGINFO; // first defi
+       const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+       const RegisterFamily dstFamily = getFamily(dstType);
+ 
+-      GBE_ASSERT(sel.hasDoubleType());
++      GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device (if this is a literal, use '1.0f' not '1.0')");
+       GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we support half.
+       if (sel.isScalarReg(insn.getDst(0))) {
+         // dst is scalar, just MOV and nothing more.
+@@ -6427,7 +6427,7 @@ extern bool OCL_DEBUGINFO; // first defi
+       const GenRegister dst = sel.selReg(insn.getDst(0), dstType);
+       const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+ 
+-      GBE_ASSERT(sel.hasDoubleType());
++      GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device");
+       GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we support native long.
+       // Just Mov
+       sel.MOV(dst, src);
+@@ -6442,7 +6442,7 @@ extern bool OCL_DEBUGINFO; // first defi
+       const GenRegister src = sel.selReg(insn.getSrc(0), srcType);
+       const RegisterFamily srcFamily = getFamily(srcType);
+ 
+-      GBE_ASSERT(sel.hasDoubleType());
++      GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device");
+       GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we support native long.
+ 
+       if (sel.hasLongType() && sel.hasLongRegRestrict()) {
diff --git a/beignet-coffeelake.patch b/beignet-coffeelake.patch

new file mode 100644 (file)

index 0000000..2b609b7
--- /dev/null
+++ b/beignet-coffeelake.patch
@@ -0,0 +1,228 @@
+Description: Enable Coffee Lake support
+
+Little change is needed here because the graphics core is the same as
+Kaby Lake.  Includes all PCI IDs currently supported by the kernel driver
+in the drm-intel tree (Coffee Lake S, H and U devices in GT 1, 2 and 3
+configurations).
+
+Origin: upstream 7e181af2ea4d37f67406f2563c0e13fa1fdbb14b
+Author: Mark Thompson
+
+--- a/backend/src/backend/gen_program.cpp
++++ b/backend/src/backend/gen_program.cpp
+@@ -209,6 +209,8 @@ namespace gbe {
+       ctx = GBE_NEW(BxtContext, unit, name, deviceID, relaxMath);
+     } else if (IS_KABYLAKE(deviceID)) {
+       ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath);
++    } else if (IS_COFFEELAKE(deviceID)) {
++      ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath);
+     } else if (IS_GEMINILAKE(deviceID)) {
+       ctx = GBE_NEW(GlkContext, unit, name, deviceID, relaxMath);
+     }
+@@ -328,6 +330,7 @@ namespace gbe {
+                                       (IS_SKYLAKE(deviceID) && MATCH_SKL_HEADER(binary)) || \
+                                       (IS_BROXTON(deviceID) && MATCH_BXT_HEADER(binary)) || \
+                                       (IS_KABYLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \
++                                      (IS_COFFEELAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \
+                                       (IS_GEMINILAKE(deviceID) && MATCH_GLK_HEADER(binary)) \
+                                       )
+ 
+@@ -436,6 +439,8 @@ namespace gbe {
+         FILL_BXT_HEADER(*binary);
+       }else if(IS_KABYLAKE(prog->deviceID)){
+         FILL_KBL_HEADER(*binary);
++      }else if(IS_COFFEELAKE(prog->deviceID)){
++        FILL_KBL_HEADER(*binary);
+       }else if(IS_GEMINILAKE(prog->deviceID)){
+         FILL_GLK_HEADER(*binary);
+       }else {
+--- a/src/cl_device_data.h
++++ b/src/cl_device_data.h
+@@ -372,7 +372,59 @@
+   (devid == PCI_CHIP_GLK_3x6 ||   \
+    devid == PCI_CHIP_GLK_2x6)
+ 
+-#define IS_GEN9(devid)     (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid))
++#define PCI_CHIP_COFFEELAKE_S_GT1_1  0x3E90
++#define PCI_CHIP_COFFEELAKE_S_GT1_2  0x3E93
++#define PCI_CHIP_COFFEELAKE_S_GT1_3  0x3E99
++
++#define PCI_CHIP_COFFEELAKE_U_GT1_1  0x3EA1
++#define PCI_CHIP_COFFEELAKE_U_GT1_2  0x3EA4
++
++#define PCI_CHIP_COFFEELAKE_S_GT2_1  0x3E91
++#define PCI_CHIP_COFFEELAKE_S_GT2_2  0x3E92
++#define PCI_CHIP_COFFEELAKE_S_GT2_3  0x3E96
++#define PCI_CHIP_COFFEELAKE_S_GT2_4  0x3E9A
++
++#define PCI_CHIP_COFFEELAKE_H_GT2_1  0x3E94
++#define PCI_CHIP_COFFEELAKE_H_GT2_2  0x3E9B
++
++#define PCI_CHIP_COFFEELAKE_U_GT2_1  0x3EA0
++#define PCI_CHIP_COFFEELAKE_U_GT2_2  0x3EA3
++#define PCI_CHIP_COFFEELAKE_U_GT2_3  0x3EA9
++
++#define PCI_CHIP_COFFEELAKE_U_GT3_1  0x3EA2
++#define PCI_CHIP_COFFEELAKE_U_GT3_2  0x3EA5
++#define PCI_CHIP_COFFEELAKE_U_GT3_3  0x3EA6
++#define PCI_CHIP_COFFEELAKE_U_GT3_4  0x3EA7
++#define PCI_CHIP_COFFEELAKE_U_GT3_5  0x3EA8
++
++#define IS_CFL_GT1(devid) \
++    (devid == PCI_CHIP_COFFEELAKE_S_GT1_1 || \
++     devid == PCI_CHIP_COFFEELAKE_S_GT1_2 || \
++     devid == PCI_CHIP_COFFEELAKE_S_GT1_3 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT1_1 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT1_2)
++
++#define IS_CFL_GT2(devid) \
++    (devid == PCI_CHIP_COFFEELAKE_S_GT2_1 || \
++     devid == PCI_CHIP_COFFEELAKE_S_GT2_2 || \
++     devid == PCI_CHIP_COFFEELAKE_S_GT2_3 || \
++     devid == PCI_CHIP_COFFEELAKE_S_GT2_4 || \
++     devid == PCI_CHIP_COFFEELAKE_H_GT2_1 || \
++     devid == PCI_CHIP_COFFEELAKE_H_GT2_2 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT2_1 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT2_2 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT2_3)
++
++#define IS_CFL_GT3(devid) \
++    (devid == PCI_CHIP_COFFEELAKE_U_GT3_1 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT3_2 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT3_3 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT3_4 || \
++     devid == PCI_CHIP_COFFEELAKE_U_GT3_5)
++
++#define IS_COFFEELAKE(devid) (IS_CFL_GT1(devid) || IS_CFL_GT2(devid) || IS_CFL_GT3(devid))
++
++#define IS_GEN9(devid)     (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid) || IS_COFFEELAKE(devid))
+ 
+ #define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120)
+ 
+--- a/src/cl_device_id.c
++++ b/src/cl_device_id.c
+@@ -274,6 +274,36 @@ static struct _cl_device_id intel_glk12eu_device = {
+ #include "cl_gen9_device.h"
+ };
+ 
++static struct _cl_device_id intel_cfl_gt1_device = {
++  .max_compute_unit = 12,
++  .max_thread_per_unit = 7,
++  .sub_slice_count = 2,
++  .max_work_item_sizes = {512, 512, 512},
++  .max_work_group_size = 256,
++  .max_clock_frequency = 1000,
++#include "cl_gen9_device.h"
++};
++
++static struct _cl_device_id intel_cfl_gt2_device = {
++  .max_compute_unit = 24,
++  .max_thread_per_unit = 7,
++  .sub_slice_count = 3,
++  .max_work_item_sizes = {512, 512, 512},
++  .max_work_group_size = 256,
++  .max_clock_frequency = 1000,
++#include "cl_gen9_device.h"
++};
++
++static struct _cl_device_id intel_cfl_gt3_device = {
++  .max_compute_unit = 48,
++  .max_thread_per_unit = 7,
++  .sub_slice_count = 6,
++  .max_work_item_sizes = {512, 512, 512},
++  .max_work_group_size = 256,
++  .max_clock_frequency = 1000,
++#include "cl_gen9_device.h"
++};
++
+ LOCAL cl_device_id
+ cl_get_gt_device(cl_device_type device_type)
+ {
+@@ -785,6 +815,64 @@ glk12eu_break:
+       cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+       break;
+ 
++    case PCI_CHIP_COFFEELAKE_S_GT1_1:
++    case PCI_CHIP_COFFEELAKE_S_GT1_2:
++    case PCI_CHIP_COFFEELAKE_S_GT1_3:
++      DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT1");
++    case PCI_CHIP_COFFEELAKE_U_GT1_1:
++    case PCI_CHIP_COFFEELAKE_U_GT1_2:
++      DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT1");
++cfl_gt1_break:
++      intel_cfl_gt1_device.device_id = device_id;
++      intel_cfl_gt1_device.platform = cl_get_platform_default();
++      ret = &intel_cfl_gt1_device;
++      cl_intel_platform_get_default_extension(ret);
++#ifdef ENABLE_FP64
++      cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
++#endif
++      cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
++      break;
++
++    case PCI_CHIP_COFFEELAKE_S_GT2_1:
++    case PCI_CHIP_COFFEELAKE_S_GT2_2:
++    case PCI_CHIP_COFFEELAKE_S_GT2_3:
++    case PCI_CHIP_COFFEELAKE_S_GT2_4:
++      DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT2");
++    case PCI_CHIP_COFFEELAKE_H_GT2_1:
++    case PCI_CHIP_COFFEELAKE_H_GT2_2:
++      DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Halo GT2");
++    case PCI_CHIP_COFFEELAKE_U_GT2_1:
++    case PCI_CHIP_COFFEELAKE_U_GT2_2:
++    case PCI_CHIP_COFFEELAKE_U_GT2_3:
++      DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT2");
++cfl_gt2_break:
++      intel_cfl_gt2_device.device_id = device_id;
++      intel_cfl_gt2_device.platform = cl_get_platform_default();
++      ret = &intel_cfl_gt2_device;
++      cl_intel_platform_get_default_extension(ret);
++#ifdef ENABLE_FP64
++      cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
++#endif
++      cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
++      break;
++
++    case PCI_CHIP_COFFEELAKE_U_GT3_1:
++    case PCI_CHIP_COFFEELAKE_U_GT3_2:
++    case PCI_CHIP_COFFEELAKE_U_GT3_3:
++    case PCI_CHIP_COFFEELAKE_U_GT3_4:
++    case PCI_CHIP_COFFEELAKE_U_GT3_5:
++      DECL_INFO_STRING(cfl_gt3_break, intel_cfl_gt3_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT3");
++cfl_gt3_break:
++      intel_cfl_gt3_device.device_id = device_id;
++      intel_cfl_gt3_device.platform = cl_get_platform_default();
++      ret = &intel_cfl_gt3_device;
++      cl_intel_platform_get_default_extension(ret);
++#ifdef ENABLE_FP64
++      cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
++#endif
++      cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
++      break;
++
+     case PCI_CHIP_SANDYBRIDGE_BRIDGE:
+     case PCI_CHIP_SANDYBRIDGE_GT1:
+     case PCI_CHIP_SANDYBRIDGE_GT2:
+@@ -992,7 +1080,10 @@ LOCAL cl_bool is_gen_device(cl_device_id device) {
+          device == &intel_kbl_gt3_device ||
+          device == &intel_kbl_gt4_device ||
+          device == &intel_glk18eu_device ||
+-         device == &intel_glk12eu_device;
++         device == &intel_glk12eu_device ||
++         device == &intel_cfl_gt1_device ||
++         device == &intel_cfl_gt2_device ||
++         device == &intel_cfl_gt3_device;
+ }
+ 
+ LOCAL cl_int
+@@ -1420,7 +1511,9 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
+         || device == &intel_bxt18eu_device || device == &intel_bxt12eu_device || device == &intel_kbl_gt1_device
+         || device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device
+         || device == &intel_kbl_gt4_device || device == &intel_kbl_gt15_device
+-        || device == &intel_glk18eu_device || device == &intel_glk12eu_device) {
++        || device == &intel_glk18eu_device || device == &intel_glk12eu_device
++        || device == &intel_cfl_gt1_device || device == &intel_cfl_gt1_device
++        || device == &intel_cfl_gt3_device) {
+     *ver = 9;
+   } else
+     return CL_INVALID_VALUE;
diff --git a/beignet-cometlake.patch b/beignet-cometlake.patch

new file mode 100644 (file)

index 0000000..0d0817e
--- /dev/null
+++ b/beignet-cometlake.patch
@@ -0,0 +1,289 @@
+Description: Add Comet Lake/Amber Lake/more Coffee Lake support
+
+Minimally tested, I suggest using intel-opencl-icd instead if possible
+
+Author: Ridley Combs
+Bug-Ubuntu: https://launchpad.net/bugs/1905340
+Origin: https://github.com/intel/beignet/pull/20/files
+
+--- a/backend/src/backend/gen_program.cpp
++++ b/backend/src/backend/gen_program.cpp
+@@ -211,6 +211,10 @@ namespace gbe {
+       ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath);
+     } else if (IS_COFFEELAKE(deviceID)) {
+       ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath);
++    } else if (IS_COMETLAKE(deviceID)) {
++      ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath);
++    } else if (IS_AMBERLAKE(deviceID)) {
++      ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath);
+     } else if (IS_GEMINILAKE(deviceID)) {
+       ctx = GBE_NEW(GlkContext, unit, name, deviceID, relaxMath);
+     }
+@@ -331,6 +335,8 @@ namespace gbe {
+                                       (IS_BROXTON(deviceID) && MATCH_BXT_HEADER(binary)) || \
+                                       (IS_KABYLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \
+                                       (IS_COFFEELAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \
++                                      (IS_COMETLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \
++                                      (IS_AMBERLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \
+                                       (IS_GEMINILAKE(deviceID) && MATCH_GLK_HEADER(binary)) \
+                                       )
+ 
+@@ -441,6 +447,10 @@ namespace gbe {
+         FILL_KBL_HEADER(*binary);
+       }else if(IS_COFFEELAKE(prog->deviceID)){
+         FILL_KBL_HEADER(*binary);
++      }else if(IS_COMETLAKE(prog->deviceID)){
++        FILL_KBL_HEADER(*binary);
++      }else if(IS_AMBERLAKE(prog->deviceID)){
++        FILL_KBL_HEADER(*binary);
+       }else if(IS_GEMINILAKE(prog->deviceID)){
+         FILL_GLK_HEADER(*binary);
+       }else {
+--- a/src/cl_device_data.h
++++ b/src/cl_device_data.h
+@@ -376,6 +376,8 @@
+ #define PCI_CHIP_COFFEELAKE_S_GT1_2  0x3E93
+ #define PCI_CHIP_COFFEELAKE_S_GT1_3  0x3E99
+ 
++#define PCI_CHIP_COFFEELAKE_H_GT1_1  0x3E9C
++
+ #define PCI_CHIP_COFFEELAKE_U_GT1_1  0x3EA1
+ #define PCI_CHIP_COFFEELAKE_U_GT1_2  0x3EA4
+ 
+@@ -383,6 +385,7 @@
+ #define PCI_CHIP_COFFEELAKE_S_GT2_2  0x3E92
+ #define PCI_CHIP_COFFEELAKE_S_GT2_3  0x3E96
+ #define PCI_CHIP_COFFEELAKE_S_GT2_4  0x3E9A
++#define PCI_CHIP_COFFEELAKE_S_GT2_5  0x3E98
+ 
+ #define PCI_CHIP_COFFEELAKE_H_GT2_1  0x3E94
+ #define PCI_CHIP_COFFEELAKE_H_GT2_2  0x3E9B
+@@ -401,6 +404,7 @@
+     (devid == PCI_CHIP_COFFEELAKE_S_GT1_1 || \
+      devid == PCI_CHIP_COFFEELAKE_S_GT1_2 || \
+      devid == PCI_CHIP_COFFEELAKE_S_GT1_3 || \
++     devid == PCI_CHIP_COFFEELAKE_H_GT1_1 || \
+      devid == PCI_CHIP_COFFEELAKE_U_GT1_1 || \
+      devid == PCI_CHIP_COFFEELAKE_U_GT1_2)
+ 
+@@ -409,6 +413,7 @@
+      devid == PCI_CHIP_COFFEELAKE_S_GT2_2 || \
+      devid == PCI_CHIP_COFFEELAKE_S_GT2_3 || \
+      devid == PCI_CHIP_COFFEELAKE_S_GT2_4 || \
++     devid == PCI_CHIP_COFFEELAKE_S_GT2_5 || \
+      devid == PCI_CHIP_COFFEELAKE_H_GT2_1 || \
+      devid == PCI_CHIP_COFFEELAKE_H_GT2_2 || \
+      devid == PCI_CHIP_COFFEELAKE_U_GT2_1 || \
+@@ -424,7 +429,65 @@
+ 
+ #define IS_COFFEELAKE(devid) (IS_CFL_GT1(devid) || IS_CFL_GT2(devid) || IS_CFL_GT3(devid))
+ 
+-#define IS_GEN9(devid)     (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid) || IS_COFFEELAKE(devid))
++#define PCI_CHIP_COMETLAKE_S_GT1_1   0x9BA5
++#define PCI_CHIP_COMETLAKE_S_GT1_2   0x9BA8
++
++#define PCI_CHIP_COMETLAKE_H_GT1_1   0x9BA4
++#define PCI_CHIP_COMETLAKE_H_GT1_2   0x9BA2
++
++#define PCI_CHIP_COMETLAKE_U_GT1_1   0x9B21
++#define PCI_CHIP_COMETLAKE_U_GT1_2   0x9BAA
++#define PCI_CHIP_COMETLAKE_U_GT1_3   0x9BAC
++
++#define PCI_CHIP_COMETLAKE_S_GT2_1   0x9BC5
++#define PCI_CHIP_COMETLAKE_S_GT2_2   0x9BC8
++
++#define PCI_CHIP_COMETLAKE_H_GT2_1   0x9BC4
++#define PCI_CHIP_COMETLAKE_H_GT2_2   0x9BC2
++
++#define PCI_CHIP_COMETLAKE_W_GT2_1   0x9BC6
++#define PCI_CHIP_COMETLAKE_W_GT2_2   0x9BE6
++#define PCI_CHIP_COMETLAKE_W_GT2_3   0x9BF6
++
++#define PCI_CHIP_COMETLAKE_U_GT2_1   0x9B41
++#define PCI_CHIP_COMETLAKE_U_GT2_2   0x9BCA
++#define PCI_CHIP_COMETLAKE_U_GT2_3   0x9BCC
++
++#define IS_CML_GT1(devid) \
++    (devid == PCI_CHIP_COMETLAKE_S_GT1_1   || \
++     devid == PCI_CHIP_COMETLAKE_S_GT1_2   || \
++     devid == PCI_CHIP_COMETLAKE_H_GT1_1   || \
++     devid == PCI_CHIP_COMETLAKE_H_GT1_2   || \
++     devid == PCI_CHIP_COMETLAKE_U_GT1_1   || \
++     devid == PCI_CHIP_COMETLAKE_U_GT1_2   || \
++     devid == PCI_CHIP_COMETLAKE_U_GT1_3)
++
++#define IS_CML_GT2(devid) \
++    (devid == PCI_CHIP_COMETLAKE_S_GT2_1   || \
++     devid == PCI_CHIP_COMETLAKE_S_GT2_2   || \
++     devid == PCI_CHIP_COMETLAKE_H_GT2_1   || \
++     devid == PCI_CHIP_COMETLAKE_H_GT2_2   || \
++     devid == PCI_CHIP_COMETLAKE_W_GT2_1   || \
++     devid == PCI_CHIP_COMETLAKE_W_GT2_2   || \
++     devid == PCI_CHIP_COMETLAKE_W_GT2_3   || \
++     devid == PCI_CHIP_COMETLAKE_U_GT2_1   || \
++     devid == PCI_CHIP_COMETLAKE_U_GT2_2   || \
++     devid == PCI_CHIP_COMETLAKE_U_GT2_3)
++
++#define IS_COMETLAKE(devid) (IS_CML_GT1(devid) || IS_CML_GT2(devid))
++
++#define PCI_CHIP_AMBERLAKE_Y_GT2_1   0x591C
++#define PCI_CHIP_AMBERLAKE_Y_GT2_2   0x87C0
++#define PCI_CHIP_AMBERLAKE_Y_GT2_3   0x87CA
++
++#define IS_AML_GT2(devid) \
++    (devid == PCI_CHIP_AMBERLAKE_Y_GT2_1   || \
++     devid == PCI_CHIP_AMBERLAKE_Y_GT2_2   || \
++     devid == PCI_CHIP_AMBERLAKE_Y_GT2_3)
++
++#define IS_AMBERLAKE(devid) (IS_AML_GT2(devid))
++
++#define IS_GEN9(devid)     (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid) || IS_COFFEELAKE(devid) || IS_COMETLAKE(devid) || IS_AMBERLAKE(devid))
+ 
+ #define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120)
+ 
+--- a/src/cl_device_id.c
++++ b/src/cl_device_id.c
+@@ -304,6 +304,36 @@ static struct _cl_device_id intel_cfl_gt3_device = {
+ #include "cl_gen9_device.h"
+ };
+ 
++static struct _cl_device_id intel_cml_gt1_device = {
++  .max_compute_unit = 12,
++  .max_thread_per_unit = 7,
++  .sub_slice_count = 2,
++  .max_work_item_sizes = {512, 512, 512},
++  .max_work_group_size = 256,
++  .max_clock_frequency = 1000,
++#include "cl_gen9_device.h"
++};
++
++static struct _cl_device_id intel_cml_gt2_device = {
++  .max_compute_unit = 24,
++  .max_thread_per_unit = 7,
++  .sub_slice_count = 3,
++  .max_work_item_sizes = {512, 512, 512},
++  .max_work_group_size = 256,
++  .max_clock_frequency = 1000,
++#include "cl_gen9_device.h"
++};
++
++static struct _cl_device_id intel_aml_gt2_device = {
++  .max_compute_unit = 24,
++  .max_thread_per_unit = 7,
++  .sub_slice_count = 3,
++  .max_work_item_sizes = {512, 512, 512},
++  .max_work_group_size = 256,
++  .max_clock_frequency = 1000,
++#include "cl_gen9_device.h"
++};
++
+ LOCAL cl_device_id
+ cl_get_gt_device(cl_device_type device_type)
+ {
+@@ -819,6 +849,8 @@ cl_get_gt_device(cl_device_type device_type)
+     case PCI_CHIP_COFFEELAKE_S_GT1_2:
+     case PCI_CHIP_COFFEELAKE_S_GT1_3:
+       DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT1");
++    case PCI_CHIP_COFFEELAKE_H_GT1_1:
++      DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Halo GT1");
+     case PCI_CHIP_COFFEELAKE_U_GT1_1:
+     case PCI_CHIP_COFFEELAKE_U_GT1_2:
+       DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT1");
+@@ -837,6 +869,7 @@ cl_get_gt_device(cl_device_type device_type)
+     case PCI_CHIP_COFFEELAKE_S_GT2_2:
+     case PCI_CHIP_COFFEELAKE_S_GT2_3:
+     case PCI_CHIP_COFFEELAKE_S_GT2_4:
++    case PCI_CHIP_COFFEELAKE_S_GT2_5:
+       DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT2");
+     case PCI_CHIP_COFFEELAKE_H_GT2_1:
+     case PCI_CHIP_COFFEELAKE_H_GT2_2:
+@@ -873,6 +906,67 @@ cl_get_gt_device(cl_device_type device_type)
+       cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+       break;
+ 
++    case PCI_CHIP_COMETLAKE_S_GT1_1:
++    case PCI_CHIP_COMETLAKE_S_GT1_2:
++      DECL_INFO_STRING(cml_gt1_break, intel_cml_gt1_device, name, "Intel(R) UHD Graphics Comet Lake Desktop GT1");
++    case PCI_CHIP_COMETLAKE_H_GT1_1:
++    case PCI_CHIP_COMETLAKE_H_GT1_2:
++      DECL_INFO_STRING(cml_gt1_break, intel_cml_gt1_device, name, "Intel(R) UHD Graphics Comet Lake Halo GT1");
++    case PCI_CHIP_COMETLAKE_U_GT1_1:
++    case PCI_CHIP_COMETLAKE_U_GT1_2:
++    case PCI_CHIP_COMETLAKE_U_GT1_3:
++      DECL_INFO_STRING(cml_gt1_break, intel_cml_gt1_device, name, "Intel(R) UHD Graphics Comet Lake Mobile GT1");
++cml_gt1_break:
++      intel_cml_gt1_device.device_id = device_id;
++      intel_cml_gt1_device.platform = cl_get_platform_default();
++      ret = &intel_cml_gt1_device;
++      cl_intel_platform_get_default_extension(ret);
++#ifdef ENABLE_FP64
++      cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
++#endif
++      cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
++      break;
++
++    case PCI_CHIP_COMETLAKE_S_GT2_1:
++    case PCI_CHIP_COMETLAKE_S_GT2_2:
++      DECL_INFO_STRING(cml_gt2_break, intel_cml_gt2_device, name, "Intel(R) UHD Graphics Comet Lake Desktop GT2");
++    case PCI_CHIP_COMETLAKE_H_GT2_1:
++    case PCI_CHIP_COMETLAKE_H_GT2_2:
++      DECL_INFO_STRING(cml_gt2_break, intel_cml_gt2_device, name, "Intel(R) UHD Graphics Comet Lake Halo GT2");
++    case PCI_CHIP_COMETLAKE_W_GT2_1:
++    case PCI_CHIP_COMETLAKE_W_GT2_2:
++    case PCI_CHIP_COMETLAKE_W_GT2_3:
++      DECL_INFO_STRING(cml_gt2_break, intel_cml_gt2_device, name, "Intel(R) UHD Graphics Comet Lake Workstation GT2");
++    case PCI_CHIP_COMETLAKE_U_GT2_1:
++    case PCI_CHIP_COMETLAKE_U_GT2_2:
++    case PCI_CHIP_COMETLAKE_U_GT2_3:
++      DECL_INFO_STRING(cml_gt2_break, intel_cml_gt2_device, name, "Intel(R) UHD Graphics Comet Lake Mobile GT2");
++cml_gt2_break:
++      intel_cml_gt2_device.device_id = device_id;
++      intel_cml_gt2_device.platform = cl_get_platform_default();
++      ret = &intel_cml_gt2_device;
++      cl_intel_platform_get_default_extension(ret);
++#ifdef ENABLE_FP64
++      cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
++#endif
++      cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
++      break;
++
++    case PCI_CHIP_AMBERLAKE_Y_GT2_1:
++    case PCI_CHIP_AMBERLAKE_Y_GT2_2:
++    case PCI_CHIP_AMBERLAKE_Y_GT2_3:
++      DECL_INFO_STRING(aml_gt2_break, intel_aml_gt2_device, name, "Intel(R) UHD Graphics Amber Lake ULX GT2");
++aml_gt2_break:
++      intel_aml_gt2_device.device_id = device_id;
++      intel_aml_gt2_device.platform = cl_get_platform_default();
++      ret = &intel_aml_gt2_device;
++      cl_intel_platform_get_default_extension(ret);
++#ifdef ENABLE_FP64
++      cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
++#endif
++      cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
++      break;
++
+     case PCI_CHIP_SANDYBRIDGE_BRIDGE:
+     case PCI_CHIP_SANDYBRIDGE_GT1:
+     case PCI_CHIP_SANDYBRIDGE_GT2:
+@@ -1083,7 +1177,10 @@ LOCAL cl_bool is_gen_device(cl_device_id device) {
+          device == &intel_glk12eu_device ||
+          device == &intel_cfl_gt1_device ||
+          device == &intel_cfl_gt2_device ||
+-         device == &intel_cfl_gt3_device;
++         device == &intel_cfl_gt3_device ||
++         device == &intel_cml_gt1_device ||
++         device == &intel_cml_gt2_device ||
++         device == &intel_aml_gt2_device;
+ }
+ 
+ LOCAL cl_int
+@@ -1513,7 +1610,8 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
+         || device == &intel_kbl_gt4_device || device == &intel_kbl_gt15_device
+         || device == &intel_glk18eu_device || device == &intel_glk12eu_device
+         || device == &intel_cfl_gt1_device || device == &intel_cfl_gt1_device
+-        || device == &intel_cfl_gt3_device) {
++        || device == &intel_cfl_gt3_device || device == &intel_cml_gt1_device
++        || device == &intel_cml_gt2_device || device == &intel_aml_gt2_device) {
+     *ver = 9;
+   } else
+     return CL_INVALID_VALUE;
diff --git a/beignet-debian-885423.patch b/beignet-debian-885423.patch

new file mode 100644 (file)

index 0000000..ac10efa
--- /dev/null
+++ b/beignet-debian-885423.patch
@@ -0,0 +1,83 @@
+Description: Fix self-test fail in some conditions
+
+Reverts upstream 81755054c4c19d821e58456a1a7d601806e60e92
+
+Known triggers are Darktable and the GEGL test suite
+
+Note that this does *not* fix the other Darktable issue
+(https://gitlab.freedesktop.org/beignet/beignet/issues/60): beignet still
+needs to be blacklisted in Darktable (which it is by default).
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Bug: https://gitlab.freedesktop.org/beignet/beignet/issues/7
+Bug-Debian: https://bugs.debian.org/885423
+Forwarded: https://lists.freedesktop.org/archives/beignet/2019-January/009227.html
+
+diff --git b/backend/src/backend/gen_insn_selection_optimize.cpp a/backend/src/backend/gen_insn_selection_optimize.cpp
+index 07547ec4..d2e0fb9b 100644
+--- b/backend/src/backend/gen_insn_selection_optimize.cpp
++++ a/backend/src/backend/gen_insn_selection_optimize.cpp
+@@ -74,7 +74,8 @@ namespace gbe
+                   const GenRegister& replacement) :
+                   insn(insn), intermedia(intermedia), replacement(replacement)
+       {
+-        assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD);
++        assert(insn.opcode == SEL_OP_MOV);
++        assert(&(insn.src(0)) == &replacement);
+         assert(&(insn.dst(0)) == &intermedia);
+         this->elements = CalculateElements(intermedia, insn.state.execWidth);
+         replacementOverwritten = false;
+@@ -101,7 +102,6 @@ namespace gbe
+     void doReplacement(ReplaceInfo* info);
+     bool CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& insn, const GenRegister& var);
+     void cleanReplaceInfoMap();
+-    void doNegAddOptimization(SelectionInstruction &insn);
+ 
+     SelectionBlock &bb;
+     const ir::Liveness::LiveOut& liveout;
+@@ -159,13 +159,8 @@ namespace gbe
+ 
+   void SelBasicBlockOptimizer::addToReplaceInfoMap(SelectionInstruction& insn)
+   {
+-    assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD);
+-    GenRegister &src = insn.src(0);
+-    if (insn.opcode == SEL_OP_ADD) {
+-      if (src.file == GEN_IMMEDIATE_VALUE)
+-        src = insn.src(1);
+-    }
+-
++    assert(insn.opcode == SEL_OP_MOV);
++    const GenRegister& src = insn.src(0);
+     const GenRegister& dst = insn.dst(0);
+     if (src.type != dst.type || src.file != dst.file)
+       return;
+@@ -254,29 +249,10 @@ namespace gbe
+ 
+       if (insn.opcode == SEL_OP_MOV)
+         addToReplaceInfoMap(insn);
+-
+-      doNegAddOptimization(insn);
+     }
+     cleanReplaceInfoMap();
+   }
+ 
+-  /* LLVM transform Mad(a, -b, c) to
+-     Add b, -b, 0
+-     Mad val, a, b, c
+-     for Gen support negtive modifier, mad(a, -b, c) is native suppoted.
+-     Also it can be used for the same like instruction sequence.
+-     Do it just like a:  mov b, -b, so it is a Mov operation like LocalCopyPropagation
+-  */
+-  void SelBasicBlockOptimizer::doNegAddOptimization(SelectionInstruction &insn) {
+-    if (insn.opcode == SEL_OP_ADD) {
+-      GenRegister src0 = insn.src(0);
+-      GenRegister src1 = insn.src(1);
+-      if ((src0.negation && src1.file == GEN_IMMEDIATE_VALUE && src1.value.f == 0.0f) ||
+-          (src1.negation && src0.file == GEN_IMMEDIATE_VALUE && src0.value.f == 0.0f))
+-        addToReplaceInfoMap(insn);
+-    }
+-  }
+-
+   void SelBasicBlockOptimizer::run()
+   {
+     for (size_t i = 0; i < MaxTries; ++i) {
diff --git a/beignet-disable-wayland-warning.patch b/beignet-disable-wayland-warning.patch

new file mode 100644 (file)

index 0000000..46b6a60
--- /dev/null
+++ b/beignet-disable-wayland-warning.patch
@@ -0,0 +1,17 @@
+Description: Disable expected-under-Wayland warning
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Bug-Debian: https://bugs.debian.org/882486
+Forwarded: https://lists.freedesktop.org/archives/beignet/2018-January/009182.html
+
+--- beignet-1.3.2.orig/src/intel/intel_driver.c
++++ beignet-1.3.2/src/intel/intel_driver.c
+@@ -235,8 +235,6 @@ if(intel->x11_display) {
+     intel_driver_init_shared(intel, intel->dri_ctx);
+     Xfree(driver_name);
+   }
+-  else
+-    fprintf(stderr, "X server found. dri2 connection failed! \n");
+ }
+ #endif
+ 
diff --git a/beignet-docs-broken-links.patch b/beignet-docs-broken-links.patch

new file mode 100644 (file)

index 0000000..940da1f
--- /dev/null
+++ b/beignet-docs-broken-links.patch
@@ -0,0 +1,63 @@
+Description: Fix broken links and list syntax in documentation
+
+The upstream documentation is meant to be a part of a larger wiki,
+and does not work properly when built as-is on its own
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+
+--- a/docs/Beignet.mdwn
++++ b/docs/Beignet.mdwn
+@@ -11,7 +11,7 @@ about the compiler, please refer to `bac
+ 
+ News
+ ----
+-[[Beignet project news|Beignet/NEWS]]
++[[Beignet project news|NEWS]]
+ 
+ Prerequisite
+ ------------
+@@ -139,6 +139,7 @@ need to refer the "Known Issues" section
+ 
+ Normally, beignet needs to run under X server environment as normal user. If there isn't X server,
+ beignet provides two alternative to run:
++
+ * Run as root without X.
+ * Enable the drm render nodes by passing drm.rnodes=1 to the kernel boot args, then you can run beignet with non-root and without X.
+ 
+@@ -288,15 +289,15 @@ Please specify your hardware when report
+ 
+ Documents for OpenCL application developers
+ -------------------------------------------
+-- [[Cross compile (yocto)|Beignet/howto/cross-compiler-howto]]
+-- [[Work with old system without c++11|Beignet/howto/oldgcc-howto]]
+-- [[Kernel Optimization Guide|Beignet/optimization-guide]]
+-- [[Libva Buffer Sharing|Beignet/howto/libva-buffer-sharing-howto]]
+-- [[V4l2 Buffer Sharing|Beignet/howto/v4l2-buffer-sharing-howto]]
++- [[Cross compile (yocto)|howto/cross-compiler-howto]]
++- [[Work with old system without c++11|howto/oldgcc-howto]]
++- [[Kernel Optimization Guide|optimization-guide]]
++- [[Libva Buffer Sharing|howto/libva-buffer-sharing-howto]]
++- [[V4l2 Buffer Sharing|howto/v4l2-buffer-sharing-howto]]
+-- [[OpenGL Buffer Sharing|Beignet/howto/gl-buffer-sharing-howto]]
+-- [[Video Motion Estimation|Beignet/howto/video-motion-estimation-howto]]
+-- [[Stand Alone Unit Test|Beignet/howto/stand-alone-utest-howto]]
+-- [[Android build|Beignet/howto/android-build-howto]]
++- [[OpenGL Buffer Sharing|howto/gl-buffer-sharing-howto]]
++- [[Video Motion Estimation|howto/video-motion-estimation-howto]]
++- [[Stand Alone Unit Test|howto/stand-alone-utest-howto]]
++- [[Android build|howto/android-build-howto]]
+ 
+ The wiki URL is as below:
+ [http://www.freedesktop.org/wiki/Software/Beignet/](http://www.freedesktop.org/wiki/Software/Beignet/)
+--- a/docs/howto/video-motion-estimation-howto.mdwn
++++ b/docs/howto/video-motion-estimation-howto.mdwn
+@@ -66,6 +66,6 @@ Please go through it for details.
+ More references
+ ---------------
+ 
+-https://www.khronos.org/registry/cl/extensions/intel/cl_intel_accelerator.txt
+-https://www.khronos.org/registry/cl/extensions/intel/cl_intel_motion_estimation.txt
+-https://software.intel.com/en-us/articles/intro-to-motion-estimation-extension-for-opencl
++[https://www.khronos.org/registry/cl/extensions/intel/cl_intel_accelerator.txt](https://www.khronos.org/registry/cl/extensions/intel/cl_intel_accelerator.txt)
++[https://www.khronos.org/registry/cl/extensions/intel/cl_intel_motion_estimation.txt](https://www.khronos.org/registry/cl/extensions/intel/cl_intel_motion_estimation.txt)
++[https://software.intel.com/en-us/articles/intro-to-motion-estimation-extension-for-opencl](https://software.intel.com/en-us/articles/intro-to-motion-estimation-extension-for-opencl)
diff --git a/beignet-eventchain-memory-leak.patch b/beignet-eventchain-memory-leak.patch

new file mode 100644 (file)

index 0000000..4b651da
--- /dev/null
+++ b/beignet-eventchain-memory-leak.patch
@@ -0,0 +1,76 @@
+Description: Don't leak memory on long chains of events
+
+Delete event->depend_events when no longer needed to avoid keeping
+the whole dependency tree in memory as long as the final event exists
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Bug-Ubuntu: https://launchpad.net/bugs/1354086
+Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009209.html
+
+--- a/src/cl_event.c
++++ b/src/cl_event.c
+@@ -184,6 +184,25 @@ cl_event_new(cl_context ctx, cl_command_
+   return e;
+ }
+ 
++/* This exists to prevent long chains of events from filling up memory (https://bugs.launchpad.net/ubuntu/+source/beignet/+bug/1354086).  Call only after the dependencies are complete, or failed and marked as such in this event's status, or when this event is being destroyed */
++LOCAL void
++cl_event_delete_depslist(cl_event event)
++{
++  CL_OBJECT_LOCK(event);
++  cl_event *old_depend_events = event->depend_events;
++  int depend_count = event->depend_event_num;
++  event->depend_event_num = 0;
++  event->depend_events = NULL;
++  CL_OBJECT_UNLOCK(event);
++  if (old_depend_events) {
++    assert(depend_count);
++    for (int i = 0; i < depend_count; i++) {
++      cl_event_delete(old_depend_events[i]);
++    }
++    cl_free(old_depend_events);
++  }
++}
++
+ LOCAL void
+ cl_event_delete(cl_event event)
+ {
+@@ -200,13 +219,7 @@ cl_event_delete(cl_event event)
+ 
+   assert(list_node_out_of_list(&event->enqueue_node));
+ 
+-  if (event->depend_events) {
+-    assert(event->depend_event_num);
+-    for (i = 0; i < event->depend_event_num; i++) {
+-      cl_event_delete(event->depend_events[i]);
+-    }
+-    cl_free(event->depend_events);
+-  }
++  cl_event_delete_depslist(event);
+ 
+   /* Free all the callbacks. Last ref, no need to lock. */
+   while (!list_empty(&event->callbacks)) {
+@@ -566,8 +579,12 @@ cl_event_exec(cl_event event, cl_int exe
+   assert(depend_status <= CL_COMPLETE || ignore_depends || exec_to_status == CL_QUEUED);
+   if (depend_status < CL_COMPLETE) { // Error happend, cancel exec.
+     ret = cl_event_set_status(event, depend_status);
++    cl_event_delete_depslist(event);
+     return depend_status;
+   }
++  if (depend_status == CL_COMPLETE) { // Avoid memory leak
++    cl_event_delete_depslist(event);
++  }
+ 
+   if (cur_status <= exec_to_status) {
+     return ret;
+--- a/src/cl_event.h
++++ b/src/cl_event.h
+@@ -44,7 +44,7 @@ typedef struct _cl_event {
+   cl_command_type event_type; /* Event type. */
+   cl_bool is_barrier;         /* Is this event a barrier */
+   cl_int status;              /* The execution status */
+-  cl_event *depend_events;    /* The events must complete before this. */
++  cl_event *depend_events;    /* The events must complete before this. May disappear after they have completed - see cl_event_delete_depslist*/
+   cl_uint depend_event_num;   /* The depend events number. */
+   list_head callbacks;        /* The events The event callback functions */
+   list_node enqueue_node;     /* The node in the enqueue list. */
diff --git a/beignet-grammar.patch b/beignet-grammar.patch

new file mode 100644 (file)

index 0000000..422e8aa
--- /dev/null
+++ b/beignet-grammar.patch
@@ -0,0 +1,61 @@
+Description: Fix grammar in documentation
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Forwarded: https://lists.freedesktop.org/archives/beignet/2017-October/009179.html
+
+--- a/docs/Beignet/Backend.mdwn
++++ b/docs/Beignet/Backend.mdwn
+@@ -9,10 +9,10 @@ Status
+ ------
+ 
+ After two years development, beignet is mature now. It now supports all the
+-OpenCL 1.2 mandatory features. Beignet get almost 100% pass rate with both
+-OpenCV 3.0 test suite and the piglit opencl test suite. There are some 
+-performance tuning related items remained, see [[here|Backend/TODO]] for a
+-(incomplete) lists of things to do.
++OpenCL 1.2 mandatory features. Beignet gets almost 100% pass rate with both
++the OpenCV 3.0 test suite and the piglit opencl test suite. There are some 
++performance tuning related items remained, see [[here|Backend/TODO]] for an
++(incomplete) list of things to do.
+ 
+ Interface with the run-time
+ ---------------------------
+@@ -61,7 +61,7 @@ Environment variables are used all over
+ - `OCL_OUTPUT_REG_ALLOC` `(0 or 1)`. Output Gen register allocations, including
+   virtual register to physical register mapping, live ranges.
+ 
+-- `OCL_OUTPUT_BUILD_LOG` `(0 or 1)`. Output error messages if there is any
++- `OCL_OUTPUT_BUILD_LOG` `(0 or 1)`. Output error messages if there are any
+   during CL kernel compiling and linking.
+ 
+ - `OCL_OUTPUT_CFG` `(0 or 1)`. Output control flow graph in .dot file.
+@@ -70,22 +70,22 @@ Environment variables are used all over
+   but without instructions in each BasicBlock.
+ 
+ - `OCL_PRE_ALLOC_INSN_SCHEDULE` `(0 or 1)`. The instruction scheduler in
+-  beignet are currently splitted into two passes: before and after register
+-  allocation. The pre-alloc scheduler tend to decrease register pressure.
++  beignet is currently split into two passes: before and after register
++  allocation. The pre-alloc scheduler tends to decrease register pressure.
+   This variable is used to disable/enable pre-alloc scheduler. This pass is
+   disabled now for some bugs.
+ 
+ - `OCL_POST_ALLOC_INSN_SCHEDULE` `(0 or 1)`. Disable/enable post-alloc
+-  instruction scheduler. The post-alloc scheduler tend to reduce instruction
++  instruction scheduler. The post-alloc scheduler tends to reduce instruction
+   latency. By default, this is enabled now.
+ 
+-- `OCL_SIMD16_SPILL_THRESHOLD` `(0 to 256)`. Tune how much registers can be
+-  spilled under SIMD16. Default value is 16. We find spill too much register
+-  under SIMD16 is not as good as fall back to SIMD8 mode. So we set the
++- `OCL_SIMD16_SPILL_THRESHOLD` `(0 to 256)`. Tune how many registers can be
++  spilled under SIMD16. Default value is 16. We find spilling too many registers
++  under SIMD16 is not as good as falling back to SIMD8 mode. So we set the
+   variable to control spilled register number under SIMD16.
+ 
+ - `OCL_USE_PCH` `(0 or 1)`. The default value is 1. If it is enabled, we use
+-  a pre compiled header file which include all basic ocl headers. This would
++  a pre compiled header file which includes all basic ocl headers. This would
+   reduce the compile time.
+ 
+ Implementation details
diff --git a/beignet-in-order-queue.patch b/beignet-in-order-queue.patch

new file mode 100644 (file)

index 0000000..78168fa
--- /dev/null
+++ b/beignet-in-order-queue.patch
@@ -0,0 +1,381 @@
+Description: Make in-order command queues actually be in-order
+
+When beignet added out-of-order execution support (7fd45f15),
+it made *all* queues use it, even ones that are nominally in-order.
+
+While using out-of-order queues is probably a good idea when possible
+(for performance), the OpenCL spec does not allow it to be the default.
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009213.html
+
+--- a/src/cl_api.c
++++ b/src/cl_api.c
+@@ -276,7 +276,7 @@ clEnqueueSVMFree (cl_command_queue command_queue,
+     data->size      = num_svm_pointers;
+     data->ptr       = user_data;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -422,7 +422,7 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue,
+     data->const_ptr    = src_ptr;
+     data->size         = size;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -434,6 +434,9 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue,
+         break;
+       }
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_copy) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+   } while(0);
+ 
+@@ -511,7 +514,7 @@ cl_int clEnqueueSVMMemFill (cl_command_queue command_queue,
+     data->pattern_size = pattern_size;
+     data->size         = size;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+--- a/src/cl_api_kernel.c
++++ b/src/cl_api_kernel.c
+@@ -223,6 +223,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
+     count *= global_wk_sz_rem[2] ? 2 : 1;
+ 
+     const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem};
++    cl_bool allow_immediate_submit = cl_command_queue_allow_bypass_submit(command_queue);
+     /* Go through the at most 8 cases and euque if there is work items left */
+     for (i = 0; i < 2; i++) {
+       for (j = 0; j < 2; j++) {
+@@ -263,7 +264,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
+             break;
+           }
+ 
+-          err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
++          err = cl_event_exec(e, ((allow_immediate_submit && event_status == CL_COMPLETE) ? CL_SUBMITTED : CL_QUEUED), CL_FALSE);
+           if (err != CL_SUCCESS) {
+             break;
+           }
+--- a/src/cl_api_mem.c
++++ b/src/cl_api_mem.c
+@@ -308,7 +308,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
+     if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
+       data->write_map = 1;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -321,6 +321,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
+       }
+ 
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_map) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+ 
+     ptr = data->ptr;
+@@ -393,7 +396,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
+     data->mem_obj = memobj;
+     data->ptr = mapped_ptr;
+ 
+-    if (e_status == CL_COMPLETE) { // No need to wait
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // No need to wait
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+         break;
+@@ -495,7 +498,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
+     data->offset = offset;
+     data->size = size;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -507,6 +510,9 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
+         break;
+       }
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_read) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+   } while (0);
+ 
+@@ -598,7 +604,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
+     data->offset = offset;
+     data->size = size;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -610,6 +616,9 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
+         break;
+       }
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_write) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+   } while (0);
+ 
+@@ -747,7 +756,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
+     data->host_row_pitch = host_row_pitch;
+     data->host_slice_pitch = host_slice_pitch;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -759,6 +768,9 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
+         break;
+       }
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_read) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+   } while (0);
+ 
+@@ -898,7 +910,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
+     data->host_row_pitch = host_row_pitch;
+     data->host_slice_pitch = host_slice_pitch;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -910,6 +922,9 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
+         break;
+       }
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_write) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+   } while (0);
+ 
+@@ -1017,7 +1032,7 @@ clEnqueueCopyBuffer(cl_command_queue command_queue,
+       break;
+     }
+ 
+-    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
++    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+     if (err != CL_SUCCESS) {
+       break;
+     }
+@@ -1207,7 +1222,7 @@ clEnqueueCopyBufferRect(cl_command_queue command_queue,
+     if (e_status < CL_COMPLETE) { // Error happend, cancel.
+       err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+       break;
+-    } else if (e_status == CL_COMPLETE) {
++    } else if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       err = cl_event_exec(e, CL_SUBMITTED, CL_FALSE);
+       if (err != CL_SUCCESS) {
+         break;
+@@ -1308,7 +1323,7 @@ clEnqueueFillBuffer(cl_command_queue command_queue,
+       break;
+     }
+ 
+-    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
++    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+     if (err != CL_SUCCESS) {
+       break;
+     }
+@@ -1395,7 +1410,7 @@ clEnqueueMigrateMemObjects(cl_command_queue command_queue,
+       break;
+     }
+ 
+-    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
++    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+     if (err != CL_SUCCESS) {
+       break;
+     }
+@@ -1574,7 +1589,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
+     if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
+       data->write_map = 1;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -1587,6 +1602,9 @@ clEnqueueMapImage(cl_command_queue command_queue,
+       }
+ 
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_map) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+ 
+     ptr = data->ptr;
+@@ -1764,7 +1782,7 @@ clEnqueueReadImage(cl_command_queue command_queue,
+     data->row_pitch = row_pitch;
+     data->slice_pitch = slice_pitch;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -1776,6 +1794,9 @@ clEnqueueReadImage(cl_command_queue command_queue,
+         break;
+       }
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_read) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+   } while (0);
+ 
+@@ -1916,7 +1937,7 @@ clEnqueueWriteImage(cl_command_queue command_queue,
+     data->row_pitch = row_pitch;
+     data->slice_pitch = slice_pitch;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -1928,6 +1949,9 @@ clEnqueueWriteImage(cl_command_queue command_queue,
+         break;
+       }
+       cl_command_queue_enqueue_event(command_queue, e);
++      if (blocking_write) {
++        cl_event_wait_for_events_list(1, &e);
++      }
+     }
+   } while (0);
+ 
+@@ -2062,7 +2086,7 @@ clEnqueueCopyImage(cl_command_queue command_queue,
+       break;
+     }
+ 
+-    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
++    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+     if (err != CL_SUCCESS) {
+       break;
+     }
+@@ -2173,7 +2197,7 @@ clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
+       break;
+     }
+ 
+-    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
++    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+     if (err != CL_SUCCESS) {
+       break;
+     }
+@@ -2285,7 +2309,7 @@ clEnqueueCopyBufferToImage(cl_command_queue command_queue,
+       break;
+     }
+ 
+-    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
++    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+     if (err != CL_SUCCESS) {
+       break;
+     }
+@@ -2395,7 +2419,7 @@ clEnqueueFillImage(cl_command_queue command_queue,
+       break;
+     }
+ 
+-    err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
++    err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE);
+     if (err != CL_SUCCESS) {
+       break;
+     }
+--- a/src/cl_command_queue.h
++++ b/src/cl_command_queue.h
+@@ -103,6 +103,11 @@ extern cl_int cl_command_queue_wait_finish(cl_command_queue queue);
+ extern cl_int cl_command_queue_wait_flush(cl_command_queue queue);
+ /* Note: Must call this function with queue's lock. */
+ extern cl_event *cl_command_queue_record_in_queue_events(cl_command_queue queue, cl_uint *list_num);
++/* Whether it is valid to call cl_event_exec directly, instead of cl_command_queue_enqueue_event */
++static inline cl_bool cl_command_queue_allow_bypass_submit(cl_command_queue queue){
++  return (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)/* if out-of-order, always */
++    || list_empty(&queue->worker.enqueued_events);/* if in-order, only if empty */
++}
+ 
+ #endif /* __CL_COMMAND_QUEUE_H__ */
+ 
+--- a/src/cl_command_queue_enqueue.c
++++ b/src/cl_command_queue_enqueue.c
+@@ -65,6 +65,8 @@ worker_thread_function(void *Arg)
+       if (cl_event_is_ready(e) <= CL_COMPLETE) {
+         list_node_del(&e->enqueue_node);
+         list_add_tail(&ready_list, &e->enqueue_node);
++      } else if(!(queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)){
++        break; /* in in-order mode, can't skip over non-ready events */
+       }
+     }
+ 
+@@ -80,18 +82,20 @@ worker_thread_function(void *Arg)
+     CL_OBJECT_UNLOCK(queue);
+ 
+     /* Do the really job without lock.*/
+-    exec_status = CL_SUBMITTED;
+-    list_for_each_safe(pos, n, &ready_list)
+-    {
+-      e = list_entry(pos, _cl_event, enqueue_node);
+-      cl_event_exec(e, exec_status, CL_FALSE);
+-    }
++    if (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /* in in-order mode, need to get each all the way to CL_COMPLETE before starting the next one */
++      exec_status = CL_SUBMITTED;
++      list_for_each_safe(pos, n, &ready_list)
++      {
++        e = list_entry(pos, _cl_event, enqueue_node);
++        cl_event_exec(e, exec_status, CL_FALSE);
++      }
+ 
+-    /* Notify all waiting for flush. */
+-    CL_OBJECT_LOCK(queue);
+-    worker->in_exec_status = CL_SUBMITTED;
+-    CL_OBJECT_NOTIFY_COND(queue);
+-    CL_OBJECT_UNLOCK(queue);
++      /* Notify all waiting for flush. */
++      CL_OBJECT_LOCK(queue);
++      worker->in_exec_status = CL_SUBMITTED;
++      CL_OBJECT_NOTIFY_COND(queue);
++      CL_OBJECT_UNLOCK(queue);
++    }
+ 
+     list_for_each_safe(pos, n, &ready_list)
+     {
+--- a/src/cl_gl_api.c
++++ b/src/cl_gl_api.c
+@@ -188,7 +188,7 @@ cl_int clEnqueueAcquireGLObjects (cl_command_queue command_queue,
+     data = &e->exec_data;
+     data->type = EnqueueReturnSuccesss;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
+@@ -274,7 +274,7 @@ cl_int clEnqueueReleaseGLObjects (cl_command_queue command_queue,
+     data = &e->exec_data;
+     data->type = EnqueueReturnSuccesss;
+ 
+-    if (e_status == CL_COMPLETE) {
++    if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) {
+       // Sync mode, no need to queue event.
+       err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+       if (err != CL_SUCCESS) {
diff --git a/beignet-llvm10-support.patch b/beignet-llvm10-support.patch

new file mode 100644 (file)

index 0000000..7b51dd8
--- /dev/null
+++ b/beignet-llvm10-support.patch
@@ -0,0 +1,334 @@
+*Does not work - not in use*
+
+Description: Add LLVM 10 support
+
+Remove -std=c++0x, as LLVM 10 requires at least c++14 (the default)
+Note that this triggers a gcc bug, so build with clang
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93299
+
+BasicBlockPass no longer exists; as they suggest, replace it with
+FunctionPass with a loop over BasicBlocks
+https://github.com/llvm/llvm-project/commit/9f0ff0b2634bab6a5be8dace005c9eb24d386dd1#diff-bddbe5e4c647cb67298584000b67dea1
+Return true from IntrinsicLoweringPass as it can modify its input
+(possibly a bug before?)
+
+setAlignment now takes a MaybeAlign not a uint
+
+Don't call initializeDominatorTreeWrapperPassPass and
+initializeLoopInfoWrapperPassPass, as they no longer exist
+
+Add explicit template initialization to avoid an undefined symbol
+
+###does not work###
+Pass clang libs as a single string to prevent them being converted
+to -Wl,-Bstatic -lclang... -Wl,-Bdynamic, as that causes a
+multiply defined options crash when run
+(multiple dynamic LLVMs??)
+builtin_acos_float()clang (LLVM option parsing): for the --pgo-warn-misexpect option: may only occur zero or one times!
+
+
+#--- a/CMake/FindLLVM.cmake
+#+++ b/CMake/FindLLVM.cmake
+#@@ -120,11 +120,14 @@ macro(add_one_lib name)
+# endif (LLVM_SYSTEM_LIBS_ORIG)
+# endif (LLVM_VERSION_NODOT VERSION_GREATER 34)
+# 
+#+#something harmless because whitespace at start is an error
+#+set(CLANG_LIBRARIES "-ldl")
+# macro(add_one_lib name)
+#   FIND_LIBRARY(CLANG_LIB
+#     NAMES ${name}
+#     PATHS ${LLVM_LIBRARY_DIR} NO_DEFAULT_PATH)
+#-  set(CLANG_LIBRARIES ${CLANG_LIBRARIES} ${CLANG_LIB})
+#+  set(CLANG_LIBRARIES "${CLANG_LIBRARIES} ${CLANG_LIB}")
+#+  message(STATUS "clanglibs name ${name} this ${CLANG_LIB} all ${CLANG_LIBRARIES} ")
+#      unset(CLANG_LIB CACHE)
+# endmacro()
+# 
+#--- a/CMakeLists.txt
+#+++ b/CMakeLists.txt
+#@@ -79,7 +78,7 @@ elseif (COMPILER STREQUAL "CLANG")
+# elseif (COMPILER STREQUAL "ICC")
+#   set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS}  -wd2928 -Wall -fPIC -fstrict-aliasing -fp-model fast -msse4.1 -Wl,-E")
+# endif ()
+#-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} -std=c++0x -Wno-invalid-offsetof")
+#+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} -Wno-invalid-offsetof")
+# set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_CXX_FLAGS}")
+# set (CMAKE_CXX_FLAGS_DEBUG          "-O0 -g -DGBE_DEBUG=1")
+# set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1")
+--- beignet-1.3.2.orig/backend/src/backend/program.cpp
++++ beignet-1.3.2/backend/src/backend/program.cpp
+@@ -695,8 +695,12 @@ namespace gbe {
+                 );
+ 
+     clang::CompilerInvocation::CreateFromArgs(*CI,
++#if LLVM_VERSION_MAJOR < 10
+                                               &args[0],
+                                               &args[0] + args.size(),
++#else
++                                              clang::ArrayRef<const char*>(args),
++#endif
+                                               Diags);
+     // Create the compiler instance
+     clang::CompilerInstance Clang;
+@@ -1248,8 +1252,12 @@ EXTEND_QUOTE:
+       // Create the compiler invocation
+       std::unique_ptr<clang::CompilerInvocation> CI(new clang::CompilerInvocation);
+       return clang::CompilerInvocation::CreateFromArgs(*CI,
++#if LLVM_VERSION_MAJOR < 10
+                                                        &args[0],
+                                                        &args[0] + args.size(),
++#else
++                                                       clang::ArrayRef<const char*>(args),
++#endif
+                                                        Diags);
+     }
+ #endif
+--- beignet-1.3.2.orig/backend/src/llvm/llvm_gen_backend.hpp
++++ beignet-1.3.2/backend/src/llvm/llvm_gen_backend.hpp
+@@ -130,10 +130,10 @@ namespace gbe
+   llvm::FunctionPass *createGenPass(ir::Unit &unit);
+ 
+   /*! Remove the GEP instructions */
+-  llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit);
++  llvm::FunctionPass *createRemoveGEPPass(const ir::Unit &unit);
+ 
+   /*! Merge load/store if possible */
+-  llvm::BasicBlockPass *createLoadStoreOptimizationPass();
++  llvm::FunctionPass *createLoadStoreOptimizationPass();
+ 
+   /*! Scalarize all vector op instructions */
+   llvm::FunctionPass* createScalarizePass();
+@@ -141,7 +141,7 @@ namespace gbe
+   llvm::ModulePass* createBarrierNodupPass(bool);
+ 
+   /*! Convert the Intrinsic call to gen function */
+-  llvm::BasicBlockPass *createIntrinsicLoweringPass();
++  llvm::FunctionPass *createIntrinsicLoweringPass();
+ 
+   /*! Passer the printf function call. */
+   llvm::FunctionPass* createPrintfParserPass(ir::Unit &unit);
+--- beignet-1.3.2.orig/backend/src/llvm/llvm_intrinsic_lowering.cpp
++++ beignet-1.3.2/backend/src/llvm/llvm_intrinsic_lowering.cpp
+@@ -29,12 +29,12 @@
+ using namespace llvm;
+ 
+ namespace gbe {
+-    class InstrinsicLowering : public BasicBlockPass
++    class InstrinsicLowering : public FunctionPass
+     {
+     public:
+       static char ID;
+       InstrinsicLowering() :
+-        BasicBlockPass(ID) {}
++        FunctionPass(ID) {}
+ 
+       void getAnalysisUsage(AnalysisUsage &AU) const {
+ 
+@@ -93,9 +93,9 @@ namespace gbe {
+         CI->eraseFromParent();
+         return NewCI;
+       }
+-      virtual bool runOnBasicBlock(BasicBlock &BB)
++      virtual bool runOnFunction(Function &F)
+       {
+-        bool changedBlock = false;
++        for (BasicBlock &BB : F) {
+         Module *M = BB.getParent()->getParent();
+ 
+         DataLayout TD(M);
+@@ -159,13 +159,14 @@ namespace gbe {
+             }
+           }
+         }
+-        return changedBlock;
++        }
++        return true;
+       }
+     };
+ 
+     char InstrinsicLowering::ID = 0;
+ 
+-    BasicBlockPass *createIntrinsicLoweringPass() {
++    FunctionPass *createIntrinsicLoweringPass() {
+       return new InstrinsicLowering();
+     }
+ } // end namespace
+--- beignet-1.3.2.orig/backend/src/llvm/llvm_loadstore_optimization.cpp
++++ beignet-1.3.2/backend/src/llvm/llvm_loadstore_optimization.cpp
+@@ -26,13 +26,13 @@
+ 
+ using namespace llvm;
+ namespace gbe {
+-  class GenLoadStoreOptimization : public BasicBlockPass {
++  class GenLoadStoreOptimization : public FunctionPass {
+ 
+   public:
+     static char ID;
+     ScalarEvolution *SE;
+     const DataLayout *TD;
+-    GenLoadStoreOptimization() : BasicBlockPass(ID) {}
++    GenLoadStoreOptimization() : FunctionPass(ID) {}
+ 
+     void getAnalysisUsage(AnalysisUsage &AU) const {
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38
+@@ -45,7 +45,9 @@ namespace gbe {
+       AU.setPreservesCFG();
+     }
+ 
+-    virtual bool runOnBasicBlock(BasicBlock &BB) {
++    virtual bool runOnFunction(Function &F) {
++        bool changedAnyBlock = false;
++        for (BasicBlock &BB : F) {
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38
+       SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ #else
+@@ -59,7 +61,9 @@ namespace gbe {
+       #else
+         TD = getAnalysisIfAvailable<DataLayout>();
+       #endif
+-      return optimizeLoadStore(BB);
++           changedAnyBlock = optimizeLoadStore(BB) | changedAnyBlock;
++        }
++        return changedAnyBlock;
+     }
+     Type    *getValueType(Value *insn);
+     Value   *getPointerOperand(Value *I);
+@@ -148,7 +152,11 @@ namespace gbe {
+       values.push_back(merged[i]);
+     }
+     LoadInst *ld = cast<LoadInst>(merged[0]);
++#if LLVM_VERSION_MAJOR < 10
+     unsigned align = ld->getAlignment();
++#else
++    MaybeAlign align = ld->getAlign();
++#endif
+     unsigned addrSpace = ld->getPointerAddressSpace();
+     // insert before first load
+     Builder.SetInsertPoint(ld);
+@@ -231,7 +239,11 @@ namespace gbe {
+ 
+     unsigned addrSpace = st->getPointerAddressSpace();
+ 
++#if LLVM_VERSION_MAJOR < 10
+     unsigned align = st->getAlignment();
++#else
++    MaybeAlign align = st->getAlign();
++#endif
+     // insert before the last store
+     Builder.SetInsertPoint(merged[size-1]);
+ 
+@@ -325,7 +337,7 @@ namespace gbe {
+     return changed;
+   }
+ 
+-  BasicBlockPass *createLoadStoreOptimizationPass() {
++  FunctionPass *createLoadStoreOptimizationPass() {
+     return new GenLoadStoreOptimization();
+   }
+ };
+--- beignet-1.3.2.orig/backend/src/llvm/llvm_passes.cpp
++++ beignet-1.3.2/backend/src/llvm/llvm_passes.cpp
+@@ -37,7 +37,7 @@
+ #include "sys/map.hpp"
+ 
+ using namespace llvm;
+-
++template class cfg::Update<BasicBlock *>;
+ namespace gbe
+ {
+   bool isKernelFunction(const llvm::Function &F) {
+@@ -219,13 +219,13 @@ namespace gbe
+     return offset;
+   }
+ 
+-  class GenRemoveGEPPasss : public BasicBlockPass
++  class GenRemoveGEPPasss : public FunctionPass
+   {
+ 
+    public:
+     static char ID;
+     GenRemoveGEPPasss(const ir::Unit &unit) :
+-      BasicBlockPass(ID),
++      FunctionPass(ID),
+       unit(unit) {}
+     const ir::Unit &unit;
+     void getAnalysisUsage(AnalysisUsage &AU) const {
+@@ -242,16 +242,18 @@ namespace gbe
+ 
+     bool simplifyGEPInstructions(GetElementPtrInst* GEPInst);
+ 
+-    virtual bool runOnBasicBlock(BasicBlock &BB)
++    virtual bool runOnFunction(Function &F)
+     {
+-      bool changedBlock = false;
++      bool changedAnyBlock = false;
++        for (BasicBlock &BB : F) {
+       iplist<Instruction>::iterator I = BB.getInstList().begin();
+       for (auto nextI = I, E = --BB.getInstList().end(); I != E; I = nextI) {
+         iplist<Instruction>::iterator I = nextI++;
+         if(GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(&*I))
+-          changedBlock = (simplifyGEPInstructions(gep) || changedBlock);
++          changedAnyBlock = (simplifyGEPInstructions(gep) | changedAnyBlock);
+       }
+-      return changedBlock;
++        }
++      return changedAnyBlock;
+     }
+   };
+ 
+@@ -367,7 +369,7 @@ namespace gbe
+     return true;
+   }
+ 
+-  BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit) {
++  FunctionPass *createRemoveGEPPass(const ir::Unit &unit) {
+     return new GenRemoveGEPPasss(unit);
+   }
+ } /* namespace gbe */
+--- beignet-1.3.2.orig/backend/src/llvm/llvm_sampler_fix.cpp
++++ beignet-1.3.2/backend/src/llvm/llvm_sampler_fix.cpp
+@@ -33,11 +33,13 @@ namespace gbe {
+   class SamplerFix : public FunctionPass {
+   public:
+     SamplerFix() : FunctionPass(ID) {
++#if LLVM_VERSION_MAJOR < 10
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 35
+       initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+ #else
+       initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+ #endif
++#endif
+     }
+ 
+     bool visitCallInst(CallInst *I) {
+--- a/backend/src/llvm/llvm_gen_backend.cpp
++++ b/backend/src/llvm/llvm_gen_backend.cpp
+@@ -575,10 +575,12 @@ namespace gbe
+         has_errors(false),
+         legacyMode(true)
+     {
++#if LLVM_VERSION_MAJOR < 10
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 37
+       initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+ #else
+       initializeLoopInfoPass(*PassRegistry::getPassRegistry());
++#endif
+ #endif
+       pass = PASS_EMIT_REGISTERS;
+     }
+--- a/backend/src/llvm/llvm_scalarize.cpp
++++ b/backend/src/llvm/llvm_scalarize.cpp
+@@ -96,10 +96,12 @@ namespace gbe {
+ 
+     Scalarize() : FunctionPass(ID)
+     {
++#if LLVM_VERSION_MAJOR < 10
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 35
+       initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+ #else
+       initializeDominatorTreePass(*PassRegistry::getPassRegistry());
++#endif
+ #endif
+     }
+ 
diff --git a/beignet-llvm6-support.patch b/beignet-llvm6-support.patch

new file mode 100644 (file)

index 0000000..07e89f7
--- /dev/null
+++ b/beignet-llvm6-support.patch
@@ -0,0 +1,37 @@
+Description: Support LLVM 6
+
+LLVMContext::setDiagnosticHandler is renamed
+LoopInfo::markAsRemoved is partly replaced by LoopInfo::erase,
+but that doesn't remove the loop from the queue
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009211.html (original), https://lists.freedesktop.org/archives/beignet/2019-January/009222.html (#913141 fix)
+
+--- a/backend/src/llvm/llvm_to_gen.cpp
++++ b/backend/src/llvm/llvm_to_gen.cpp
+@@ -322,7 +322,11 @@ namespace gbe
+     DataLayout DL(&mod);
+     
+     gbeDiagnosticContext dc;
++#if LLVM_VERSION_MAJOR >= 6
++    mod.getContext().setDiagnosticHandlerCallBack(&gbeDiagnosticHandler,&dc);
++#else
+     mod.getContext().setDiagnosticHandler(&gbeDiagnosticHandler,&dc);
++#endif
+ 
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 37
+     mod.setDataLayout(DL);
+--- a/backend/src/llvm/llvm_unroll.cpp
++++ b/backend/src/llvm/llvm_unroll.cpp
+@@ -205,7 +205,10 @@ namespace gbe {
+           if (parentTripCount != 0 && currTripCount * parentTripCount > 32) {
+             //Don't change the unrollID if doesn't force unroll.
+             //setUnrollID(parentL, false);
+-#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38
++#if LLVM_VERSION_MAJOR >= 6
++            LPM.markLoopAsDeleted(*parentL);
++            loopInfo.erase(parentL);
++#elif LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38
+             loopInfo.markAsRemoved(parentL);
+ #else
+             LPM.deleteLoopFromQueue(parentL);
diff --git a/beignet-llvm7-support.patch b/beignet-llvm7-support.patch

new file mode 100644 (file)

index 0000000..9d460ce
--- /dev/null
+++ b/beignet-llvm7-support.patch
@@ -0,0 +1,106 @@
+Description: Add LLVM 7 support
+
+1.Change linking order, as clangCodeGen now links to clangFrontend
+2.Pass references not pointers to WriteBitcodeToFile and CloneModule
+3.Add the headers that LoopSimplifyID, LCSSAID and
+some create*Pass have moved to
+4.Define our DEBUG whether or not we just undefined LLVM's
+(theirs is now LLVM_DEBUG, but we never actually use it)
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Bug-Debian: https://bugs.debian.org/912787
+Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009212.html
+
+--- a/CMake/FindLLVM.cmake
++++ b/CMake/FindLLVM.cmake
+@@ -113,10 +113,10 @@ macro(add_one_lib name)
+ endmacro()
+ 
+ #Assume clang lib path same as llvm lib path
++add_one_lib("clangCodeGen")
+ add_one_lib("clangFrontend")
+ add_one_lib("clangSerialization")
+ add_one_lib("clangDriver")
+-add_one_lib("clangCodeGen")
+ add_one_lib("clangSema")
+ add_one_lib("clangStaticAnalyzerFrontend")
+ add_one_lib("clangStaticAnalyzerCheckers")
+--- a/backend/src/backend/gen_program.cpp
++++ b/backend/src/backend/gen_program.cpp
+@@ -449,7 +449,11 @@ namespace gbe {
+ #ifdef GBE_COMPILER_AVAILABLE
+       std::string str;
+       llvm::raw_string_ostream OS(str);
++#if LLVM_VERSION_MAJOR >= 7
++      llvm::WriteBitcodeToFile(*((llvm::Module*)prog->module), OS);
++#else
+       llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS);
++#endif
+       std::string& bin_str = OS.str();
+       int llsz = bin_str.size();
+       *binary = (char *)malloc(sizeof(char) * (llsz+1) );
+@@ -540,7 +544,11 @@ namespace gbe {
+                                     &modRef);
+         src = llvm::unwrap(modRef);
+       }
++#if LLVM_VERSION_MAJOR >= 7
++      llvm::Module* clone = llvm::CloneModule(*src).release();
++#else
+       llvm::Module* clone = llvm::CloneModule(src).release();
++#endif
+       if (LLVMLinkModules2(wrap(dst), wrap(clone))) {
+ #elif LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 37
+       if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource_Removed, &errMsg)) {
+--- a/backend/src/backend/program.cpp
++++ b/backend/src/backend/program.cpp
+@@ -794,7 +794,11 @@ namespace gbe {
+       llvm::raw_fd_ostream ostream (dumpSPIRBinaryName.c_str(),
+                                     err, llvm::sys::fs::F_None);
+       if (!err)
++#if LLVM_VERSION_MAJOR<7
+         llvm::WriteBitcodeToFile(*out_module, ostream);
++#else
++        llvm::WriteBitcodeToFile(**out_module, ostream);
++#endif
+     }
+ #endif
+     return true;
+--- a/backend/src/llvm/llvm_bitcode_link.cpp
++++ b/backend/src/llvm/llvm_bitcode_link.cpp
+@@ -340,7 +340,11 @@ namespace gbe
+     /* We use beignet's bitcode as dst because it will have a lot of
+        lazy functions which will not be loaded. */
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 39
++#if LLVM_VERSION_MAJOR >= 7
++    llvm::Module * linked_module = llvm::CloneModule(*(llvm::Module*)mod).release();
++#else
+     llvm::Module * linked_module = llvm::CloneModule((llvm::Module*)mod).release();
++#endif
+     if(LLVMLinkModules2(wrap(clonedLib), wrap(linked_module))) {
+ #else
+     char* errorMsg;
+--- a/backend/src/llvm/llvm_includes.hpp
++++ b/backend/src/llvm/llvm_includes.hpp
+@@ -89,6 +89,10 @@
+ #include "llvm/CodeGen/IntrinsicLowering.h"
+ 
+ #include "llvm/Transforms/Scalar.h"
++#if LLVM_VERSION_MAJOR >= 7
++#include "llvm/Transforms/Utils.h"
++#include "llvm/Transforms/InstCombine/InstCombine.h"
++#endif
+ #include "llvm/MC/MCAsmInfo.h"
+ #include "llvm/MC/MCContext.h"
+ #include "llvm/MC/MCInstrInfo.h"
+--- a/backend/src/llvm/ExpandLargeIntegers.cpp
++++ b/backend/src/llvm/ExpandLargeIntegers.cpp
+@@ -99,8 +99,8 @@ using namespace llvm;
+ 
+ #ifdef DEBUG
+   #undef DEBUG
+-  #define DEBUG(...)
+ #endif
++#define DEBUG(...)
+ // Break instructions up into no larger than 64-bit chunks.
+ static const unsigned kChunkBits = 64;
+ static const unsigned kChunkBytes = kChunkBits / CHAR_BIT;
diff --git a/beignet-llvm8-support.patch b/beignet-llvm8-support.patch

new file mode 100644 (file)

index 0000000..b17a180
--- /dev/null
+++ b/beignet-llvm8-support.patch
@@ -0,0 +1,71 @@
+Description: Fix build with LLVM/Clang 8
+
+Origin: (partly) FreeBSD https://svnweb.freebsd.org/ports/head/lang/beignet/files/patch-llvm8?view=markup
+Author: Jan Beich, Rebecca N. Palmer
+
+--- a/backend/src/CMakeLists.txt
++++ b/backend/src/CMakeLists.txt
+@@ -168,6 +168,7 @@ add_dependencies(gbe beignet_bitcode)
+ endif (NOT (USE_STANDALONE_GBE_COMPILER STREQUAL "true"))
+ 
+ add_library(gbeinterp SHARED gbe_bin_interpreter.cpp)
++target_link_libraries(gbeinterp ${LLVM_MODULE_LIBS} ${LLVM_SYSTEM_LIBS})
+ 
+ if (LLVM_VERSION_NODOT VERSION_EQUAL 34)
+   find_library(TERMINFO NAMES tinfo ncurses)
+--- a/backend/src/llvm/llvm_gen_backend.cpp
++++ b/backend/src/llvm/llvm_gen_backend.cpp
+@@ -3073,14 +3073,22 @@ namespace gbe
+ 
+ 
+   static unsigned getChildNo(BasicBlock *bb) {
++#if LLVM_VERSION_MAJOR < 8
+     TerminatorInst *term = bb->getTerminator();
++#else
++    Instruction *term = bb->getTerminator();
++#endif
+     return term->getNumSuccessors();
+   }
+ 
+   // return NULL if index out-range of children number
+   static BasicBlock *getChildPossible(BasicBlock *bb, unsigned index) {
+ 
++#if LLVM_VERSION_MAJOR < 8
+     TerminatorInst *term = bb->getTerminator();
++#else
++    Instruction *term = bb->getTerminator();
++#endif
+     unsigned childNo = term->getNumSuccessors();
+     BasicBlock *child = NULL;
+     if(index < childNo) {
+--- a/backend/src/backend/gen_register.hpp
++++ b/backend/src/backend/gen_register.hpp
+@@ -225,6 +225,7 @@ namespace gbe
+                        uint32_t width,
+                        uint32_t hstride)
+     {
++      this->value.reg = 0;//avoid subgroup crash
+       this->type = type;
+       this->file = file;
+       this->nr = nr;
+--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
++++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl
+@@ -216,13 +216,14 @@ OVERLOADABLE ulong mad_sat(ulong a, ulon
+   return __gen_ocl_mad_sat(a, b, c);
+ }
+ 
+-OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); }
++// the 'volatile' is to make the LLVM optimizer leave these alone, as it would convert them to intrinsics (fshl/fshr) that we don't implement
++OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { volatile uchar z; z = (x << y); return z | (x >> (8 - y)); }
+ OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); }
+-OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); }
++OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { volatile ushort z; z = (x << y); return z | (x >> (16 - y)); }
+ OVERLOADABLE short __rotate_left(short x, short y) { return __rotate_left((ushort)x, (ushort)y); }
+-OVERLOADABLE uint __rotate_left(uint x, uint y) { return (x << y) | (x >> (32 - y)); }
++OVERLOADABLE uint __rotate_left(uint x, uint y) { volatile uint z; z = (x << y); return z | (x >> (32 - y)); }
+ OVERLOADABLE int __rotate_left(int x, int y) { return __rotate_left((uint)x, (uint)y); }
+-OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { return (x << y) | (x >> (64 - y)); }
++OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { volatile ulong z; z = (x << y); return z | (x >> (64 - y)); }
+ OVERLOADABLE long __rotate_left(long x, long y) { return __rotate_left((ulong)x, (ulong)y); }
+ #define DEF(type, m) OVERLOADABLE type rotate(type x, type y) { return __rotate_left(x, (type)(y & m)); }
+ DEF(char, 7)
diff --git a/beignet-llvm9-support.patch b/beignet-llvm9-support.patch

new file mode 100644 (file)

index 0000000..5bcb516
--- /dev/null
+++ b/beignet-llvm9-support.patch
@@ -0,0 +1,113 @@
+Description: Fix build with LLVM/Clang 9
+
+Origin: (mostly) FreeBSD https://svnweb.freebsd.org/ports/head/lang/beignet/files/patch-llvm9?view=markup
+Author: Jan Beich, Rebecca N. Palmer
+
+--- a/CMake/FindLLVM.cmake
++++ b/CMake/FindLLVM.cmake
+@@ -126,6 +126,9 @@ macro(add_one_lib name)
+ add_one_lib("clangStaticAnalyzerCore")
+ add_one_lib("clangAnalysis")
+ add_one_lib("clangEdit")
++if (LLVM_VERSION_NODOT VERSION_GREATER 80)
++add_one_lib("clangASTMatchers")
++endif (LLVM_VERSION_NODOT VERSION_GREATER 80)
+ add_one_lib("clangAST")
+ add_one_lib("clangParse")
+ add_one_lib("clangSema")
+--- a/backend/src/llvm/llvm_intrinsic_lowering.cpp
++++ b/backend/src/llvm/llvm_intrinsic_lowering.cpp
+@@ -77,7 +77,11 @@ namespace gbe {
+         std::vector<Type *> ParamTys;
+         for (Value** I = ArgBegin; I != ArgEnd; ++I)
+           ParamTys.push_back((*I)->getType());
++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90
++        FunctionCallee FCache = M->getOrInsertFunction(NewFn,
++#else
+         Constant* FCache = M->getOrInsertFunction(NewFn,
++#endif
+                                         FunctionType::get(RetTy, ParamTys, false));
+ 
+         IRBuilder<> Builder(CI->getParent(), BasicBlock::iterator(CI));
+--- a/backend/src/llvm/llvm_sampler_fix.cpp
++++ b/backend/src/llvm/llvm_sampler_fix.cpp
+@@ -82,7 +82,11 @@ namespace gbe {
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
+           Module *M = I->getParent()->getParent()->getParent();
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 50
++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90
++          FunctionCallee samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType());
++#else
+           Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType());
++#endif
+ #else
+           Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType(), nullptr);
+ #endif
+@@ -124,7 +128,11 @@ namespace gbe {
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
+           Module *M = I->getParent()->getParent()->getParent();
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 50
++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90
++          FunctionCallee samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType());
++#else
+           Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType());
++#endif
+ #else
+           Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType(), nullptr);
+ #endif
+--- a/backend/src/llvm/llvm_profiling.cpp
++++ b/backend/src/llvm/llvm_profiling.cpp
+@@ -163,10 +163,18 @@ namespace gbe
+       // __gen_ocl_store_timestamp(int nth, int type);
+       Value *Args[2] = {ConstantInt::get(intTy, pointNum++), ConstantInt::get(intTy, profilingType)};
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 50
++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90
++      builder->CreateCall(module->getOrInsertFunction(
++#else
+       builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
++#endif
+               "__gen_ocl_calc_timestamp", Type::getVoidTy(module->getContext()),
+               IntegerType::getInt32Ty(module->getContext()),
++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90
++              IntegerType::getInt32Ty(module->getContext())),
++#else
+               IntegerType::getInt32Ty(module->getContext()))),
++#endif
+               ArrayRef<Value*>(Args));
+ #else
+       builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+@@ -185,10 +193,18 @@ namespace gbe
+     Value *Args2[2] = {profilingBuf, ConstantInt::get(intTy, profilingType)};
+ 
+ #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 50
++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90
++    builder->CreateCall(module->getOrInsertFunction(
++#else
+     builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
++#endif
+             "__gen_ocl_store_profiling", Type::getVoidTy(module->getContext()),
+             ptrTy,
++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90
++            IntegerType::getInt32Ty(module->getContext())),
++#else
+             IntegerType::getInt32Ty(module->getContext()))),
++#endif
+             ArrayRef<Value*>(Args2));
+ #else
+     builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+--- a/backend/src/llvm/llvm_device_enqueue.cpp
++++ b/backend/src/llvm/llvm_device_enqueue.cpp
+@@ -398,8 +398,13 @@ namespace gbe {
+               std::vector<Type *> ParamTys;
+               for (Value** iter = args.begin(); iter != args.end(); ++iter)
+                 ParamTys.push_back((*iter)->getType());
++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90
++              CallInst* newCI = builder.CreateCall(mod->getOrInsertFunction(
++                              "__gen_enqueue_kernel_slm", FunctionType::get(intTy, ParamTys, false)), args);
++#else
+               CallInst* newCI = builder.CreateCall(cast<llvm::Function>(mod->getOrInsertFunction(
+                               "__gen_enqueue_kernel_slm", FunctionType::get(intTy, ParamTys, false))), args);
++#endif
+               CI->replaceAllUsesWith(newCI);
+               deadInsnSet.insert(CI);
+             }
diff --git a/beignet-reduce-notfound-output.patch b/beignet-reduce-notfound-output.patch

new file mode 100644 (file)

index 0000000..93924c9
--- /dev/null
+++ b/beignet-reduce-notfound-output.patch
@@ -0,0 +1,41 @@
+Description: Reduce error spew on unsupported hardware
+
+Loading beignet on unsupported hardware produces ~20 errors, even
+if another ICD is also installed that does support the hardware.
+Replace these with one message that explicitly says what to do.
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+
+--- a/src/cl_device_id.c
++++ b/src/cl_device_id.c
+@@ -797,10 +797,10 @@ glk12eu_break:
+     case PCI_CHIP_SANDYBRIDGE_BRIDGE_S:
+     case PCI_CHIP_SANDYBRIDGE_S_GT:
+       // Intel(R) HD Graphics SandyBridge not supported yet
++    default:
++      fprintf(stderr, "beignet-opencl-icd: no supported GPU found, this is probably the wrong opencl-icd package for this hardware\n(If you have multiple ICDs installed and OpenCL works, you can ignore this message)\n");
+       ret = NULL;
+       break;
+-    default:
+-      printf("cl_get_gt_device(): error, unknown device: %x\n", device_id);
+   }
+ 
+   if (ret == NULL)
+--- a/src/intel/intel_driver.c
++++ b/src/intel/intel_driver.c
+@@ -263,7 +263,6 @@ if(!intel_driver_is_active(intel)) {
+ }
+ 
+ if(!intel_driver_is_active(intel)) {
+-  fprintf(stderr, "Device open failed, aborting...\n");
+   return CL_DEVICE_NOT_FOUND;
+ }
+ 
+@@ -324,7 +323,6 @@ drm_client_t client;
+ // usually dev_name = "/dev/dri/card%d"
+ dev_fd = open(dev_name, O_RDWR);
+ if (dev_fd == -1) {
+-  fprintf(stderr, "open(\"%s\", O_RDWR) failed: %s\n", dev_name, strerror(errno));
+   return 0;
+ }
+ 
diff --git a/beignet-reduce-notfound-output2.patch b/beignet-reduce-notfound-output2.patch

new file mode 100644 (file)

index 0000000..eb63b91
--- /dev/null
+++ b/beignet-reduce-notfound-output2.patch
@@ -0,0 +1,61 @@
+Description: Reduce error spew on unsupported or hybrid hardware
+
+Explicitly check if the device is i915 before calling random ioctl()s
+to avoid triggering pointless user-visible error messages if it is not.
+
+Origin: upstream b70d65ba25a32a965cc122bf944ba14a1aa0a095
+Author: Mark Thompson
+
+--- a/src/intel/intel_driver.c
++++ b/src/intel/intel_driver.c
+@@ -312,6 +312,26 @@ return ret;
+ }
+ #endif
+ 
++static int
++intel_driver_check_device(int dev_fd)
++{
++  // Ensure that this is actually an i915 DRM device.
++  drmVersion *version;
++  int ret;
++  version = drmGetVersion(dev_fd);
++  if (!version) {
++    fprintf(stderr, "drmGetVersion(%d) failed: %s\n", dev_fd, strerror(errno));
++    close(dev_fd);
++    return 0;
++  }
++  ret = !strcmp(version->name, "i915");
++  drmFreeVersion(version);
++  // Don't print an error here if this device is using a different driver,
++  // because we might be iterating over multiple devices looking for a
++  // compatible one.
++  return ret;
++}
++
+ LOCAL int
+ intel_driver_init_master(intel_driver_t *driver, const char* dev_name)
+ {
+@@ -326,6 +346,11 @@ if (dev_fd == -1) {
+   return 0;
+ }
+ 
++if (!intel_driver_check_device(dev_fd)) {
++  close(dev_fd);
++  return 0;
++}
++
+ // Check that we're authenticated
+ memset(&client, 0, sizeof(drm_client_t));
+ ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client);
+@@ -356,6 +381,11 @@ dev_fd = open(dev_name, O_RDWR);
+ if (dev_fd == -1)
+   return 0;
+ 
++if (!intel_driver_check_device(dev_fd)) {
++  close(dev_fd);
++  return 0;
++}
++
+ ret = intel_driver_init(driver, dev_fd);
+ driver->need_close = 1;
+ 
diff --git a/beignet-update-docs.patch b/beignet-update-docs.patch

new file mode 100644 (file)

index 0000000..735bea2
--- /dev/null
+++ b/beignet-update-docs.patch
@@ -0,0 +1,161 @@
+Description: Update documentation
+
+Reflect #767148 fix, high precision now being default, and releases.
+Clarify what hardware is supported.
+Add Debian specific information; recommend the Debian BTS rather than
+mostly-inactive upstream contact points.
+
+Author: Rebecca N. Palmer <rebecca_palmer@zoho.com>
+Forwarded: partially, most recently https://lists.freedesktop.org/archives/beignet/2019-January/009225.html
+
+--- a/src/cl_device_id.c
++++ b/src/cl_device_id.c
+@@ -907,7 +907,7 @@ cl_self_test(cl_device_id device, cl_sel
+                     } else {
+                       ret = SELF_TEST_SLM_FAIL;
+                       printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (%i, %i, %i)\n"
+-                             "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n",
++                             "This can usually be fixed by upgrading Linux to >= 4.2,\nsee /usr/share/doc/beignet-dev/Beignet.html or https://www.freedesktop.org/wiki/Software/Beignet/\n",
+                              test_data[0], test_data[1], test_data[2]);
+ 
+                     }
+--- a/docs/Beignet/Backend.mdwn
++++ b/docs/Beignet/Backend.mdwn
+@@ -37,9 +37,7 @@ Environment variables are used all over
+   precision math instructions compliant with OpenCL Spec. So we provide a
+   software version to meet the high precision requirement. Obviously the
+   software version's performance is not as good as native version supported by
+-  GEN hardware. What's more, most graphics application don't need this high
+-  precision, so we choose 0 as the default value. So OpenCL apps do not suffer
+-  the performance penalty for using high precision math functions.
++  GEN hardware.
+ 
+ - `OCL_SIMD_WIDTH` `(8 or 16)`. Select the number of lanes per hardware thread,
+   Normally, you don't need to set it, we will select suitable simd width for
+--- /dev/null
++++ b/docs/index.mdwn
+@@ -0,0 +1 @@
++[[!map pages="* and !ikiwiki/*" show=title]]
+--- a/docs/Beignet.mdwn
++++ b/docs/Beignet.mdwn
+@@ -16,6 +16,8 @@ News
+ Prerequisite
+ ------------
+ 
++(for building the upstream source; Debian packages handle this automatically)
++
+ The project depends on the following external libraries:
+ 
+ - libdrm libraries (libdrm and libdrm\_intel)
+@@ -84,7 +86,7 @@ you need to configure it as below:
+ 
+ CMake will check the dependencies and will complain if it does not find them.
+ 
+-`> make`
++`> make utest`
+ 
+ The cmake will build the backend firstly. Please refer to:
+ [[OpenCL Gen Backend|Beignet/Backend]] to get more dependencies.
+@@ -123,25 +125,17 @@ platform. Beignet also produces various
+ consistency. This small test framework uses a simple c++ registration system to
+ register all the unit tests.
+ 
+-You need to call setenv.sh in the utests/ directory to set some environment variables
+-firstly as below:
+-
+-`> . setenv.sh`
+-
+-Then in `utests/`:
++In Debian beignet, the testing tool is in the _beignet-dev_ package, and is run with:
+ 
+-`> ./utest_run`
++`> /usr/lib/\`dpkg-architecture -qDEB_HOST_MULTIARCH\`/beignet/utest_run`
+ 
+-will run all the unit tests one after the others
++(this name and path may change in future releases - please do not rely on it).  It will test the first OpenCL GPU device it finds (which does _not_ have to be a Beignet device).  If you want to test a different device, uninstall all other ICDs.  To see more options, pass `-h`.
+ 
+-`> ./utest_run some_unit_test`
++If you compiled Beignet yourself, you will find this tool in `utests`, and will need to set some environment variables to use it (see `setenv.sh`).
+ 
+-will only run `some_unit_test` test.
+ 
+ On all supported target platform, the pass rate should be 100%. If it is not, you may
+-need to refer the "Known Issues" section. Please be noted, the `. setenv.sh` is only
+-required to run unit test cases. For all other OpenCL applications, don't execute that
+-command.
++need to refer the "Known Issues" section.
+ 
+ Normally, beignet needs to run under X server environment as normal user. If there isn't X server,
+ beignet provides two alternative to run:
+@@ -151,14 +145,23 @@ beignet provides two alternative to run:
+ Supported Targets
+ -----------------
+ 
++Beignet aims to support Gen7 to Gen9 Intel HD (not PowerVR) integrated GPUs.  It is the only OpenCL in Debian for the GPUs of:
++
+  * 3rd Generation Intel Core Processors "Ivybridge".
+  * 3rd Generation Intel Atom Processors "BayTrail".
+  * 4th Generation Intel Core Processors "Haswell", need kernel patch if your linux kernel older than 4.2, see the "Known Issues" section.
++
++Beignet should also work on the following, but if CL-GL sharing is not required, [Intel Compute Runtime (Neo)](https://01.org/compute-runtime) (Debian package [intel-opencl-icd](https://packages.debian.org/search?keywords=intel-opencl-icd&searchon=names&exact=1&suite=all&section=all)) may be better:
++
+  * 5th Generation Intel Core Processors "Broadwell".
+  * 5th Generation Intel Atom Processors "Braswell".
+- * 6th Generation Intel Core Processors "Skylake" and "Kabylake".
+  * 5th Generation Intel Atom Processors "Broxten" or "Apollolake".
++ * 6th+ Generation Intel Core Processors "Skylake", "Kaby Lake", "Coffee Lake", "Comet Lake", "Amber Lake".
+ 
++Not all processor models have an integrated GPU (Beignet does _not_ support running OpenCL on the CPU itself - see [pocl-opencl-icd](https://packages.debian.org/search?keywords=pocl-opencl-icd&searchon=names&exact=1&suite=all&section=all) for that).  Systems that also have a discrete GPU may disable the integrated GPU: check _xrandr --listproviders_ or see [here](https://nouveau.freedesktop.org/wiki/Optimus/).
++
++Attempting to run Beignet on unsupported hardware should return CL_DEVICE_NOT_FOUND; if it does anything else (especially crashing) please [report a bug](#howtocontribute).
++
+ OpenCL 2.0
+ ----------
+ From release v1.3.0, beignet supports OpenCL 2.0 on Skylake and later hardware.
+@@ -188,7 +191,7 @@ Known Issues
+   forever until a reboot.
+ 
+ * "Beignet: self-test failed" and almost all unit tests fail.
+-  Linux 3.15 and 3.16 (commits [f0a346b](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=f0a346bdafaf6fc4a51df9ddf1548fd888f860d8)
++  Linux 3.15 and upstream 3.16 (_not_ Debian jessie 3.16; commits [f0a346b](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=f0a346bdafaf6fc4a51df9ddf1548fd888f860d8)
+   to [c9224fa](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c9224faa59c3071ecfa2d4b24592f4eb61e57069))
+   enable the register whitelist by default but miss some registers needed
+   for Beignet.
+@@ -224,10 +227,15 @@ Known Issues
+   This extension is partially implemented(the most commonly used part), and we will implement
+   other parts based on requirement.
+ 
++* Programs using Intel-specific extensions fail to compile.
++  
++  Debian opencl-c-headers is the standard (Khronos) headers, which splits some of these into separate files
++  (e.g. cl\_ext\_intel.h): search /usr/include/CL.  cl\_intel.h is in the beignet-dev package.
++
+ Project repository
+ ------------------
+ Right now, we host our project on fdo at:
+-[http://cgit.freedesktop.org/beignet/](http://cgit.freedesktop.org/beignet/).  
++[https://gitlab.freedesktop.org/beignet/beignet](https://gitlab.freedesktop.org/beignet/beignet).
+ And the Intel 01.org:
+ [https://01.org/beignet](https://01.org/beignet)
+ 
+@@ -263,15 +271,11 @@ If I missed any other package maintainer
+ 
+ How to contribute
+ -----------------
+-You are always welcome to contribute to this project, just need to subscribe
+-to the beignet mail list and send patches to it for review.
+-The official mail list is as below:
+-[http://lists.freedesktop.org/mailman/listinfo/beignet](http://lists.freedesktop.org/mailman/listinfo/beignet)  
+-The official bugzilla is at:
+-[https://bugs.freedesktop.org/enter_bug.cgi?product=Beignet](https://bugs.freedesktop.org/enter_bug.cgi?product=Beignet)  
+-You are welcome to submit beignet bug. Please be noted, please specify the exact platform
+-information, such as BYT/IVB/HSW/BDW, and GT1/GT2/GT3. You can easily get this information
+-by running the beignet's unit test.
++Please [report bugs to Debian](https://www.debian.org/Bugs/Reporting) package beignet-opencl-icd.
++
++Please specify your hardware when reporting a bug: _reportbug beignet-opencl-icd_ will automatically include this information.
++
++The upstream [email list](http://lists.freedesktop.org/mailman/listinfo/beignet) and [bug tracker](https://gitlab.freedesktop.org/beignet/beignet/issues) are still available, but as upstream is mostly inactive, it may be some time before they respond.
+ 
+ Documents for OpenCL application developers
+ -------------------------------------------
diff --git a/beignet.spec b/beignet.spec

index c62d919ab37637d3ce45435886c4a81816135c01..b486659765edef1f14c174d8cfcd4d539770e19d 100644 (file)
--- a/beignet.spec
+++ b/beignet.spec
@@ -10,6 +10,28 @@ Source0:     https://01.org/sites/default/files/beignet-%{version}-source.tar.gz
  # Source0-md5: a577ab18d67a891c8767b8ea62253543
  Patch0:                cflags.patch
  Patch1:                static_llvm.patch
+# Debian patches
+Patch10:       %{name}-reduce-notfound-output.patch
+Patch11:       %{name}-update-docs.patch
+Patch12:       %{name}-docs-broken-links.patch
+Patch13:       %{name}-cl_accelerator_intel.patch
+Patch14:       %{name}-grammar.patch
+Patch15:       %{name}-clearer-type-errors.patch
+Patch16:       %{name}-debian-885423.patch
+Patch17:       %{name}-disable-wayland-warning.patch
+Patch18:       %{name}-eventchain-memory-leak.patch
+Patch19:       %{name}-llvm6-support.patch
+Patch20:       %{name}-llvm7-support.patch
+Patch21:       %{name}-accept-old-create-queue.patch
+Patch22:       %{name}-reduce-notfound-output2.patch
+Patch23:       %{name}-coffeelake.patch
+Patch24:       %{name}-in-order-queue.patch
+Patch25:       %{name}-accept-ignore--g.patch
+Patch26:       %{name}-llvm8-support.patch
+Patch27:       %{name}-llvm9-support.patch
+Patch28:       %{name}-cometlake.patch
+# modified
+Patch29:       %{name}-llvm10-support.patch
  URL:           https://www.freedesktop.org/wiki/Software/Beignet/
  BuildRequires: EGL-devel
  BuildRequires: Mesa-libgbm-devel
@@ -56,6 +78,26 @@ poleceń, jądra i programów oraz uruchamia je na GPU.
  %setup -qn Beignet-%{version}-Source
  %patch0 -p1
  %patch1 -p1
+%patch10 -p1
+%patch11 -p1
+%patch12 -p1
+%patch13 -p1
+%patch14 -p1
+%patch15 -p1
+%patch16 -p1
+%patch17 -p1
+%patch18 -p1
+%patch19 -p1
+%patch20 -p1
+%patch21 -p1
+%patch22 -p1
+%patch23 -p1
+%patch24 -p1
+%patch25 -p1
+%patch26 -p1
+%patch27 -p1
+%patch28 -p1
+%patch29 -p1
  
  # don't lower default -std= on g++ 5+ (recent llvm requires C++14)
  %if "%{_ver_ge '%{cxx_version}' '5.0'}" == "1"
author	Jakub Bogusz <qboosh@pld-linux.org>
	Wed, 19 May 2021 04:35:54 +0000 (06:35 +0200)
committer	Jakub Bogusz <qboosh@pld-linux.org>
	Wed, 19 May 2021 04:36:04 +0000 (06:36 +0200)
beignet-accept-ignore--g.patch	[new file with mode: 0644]	patch \| blob
beignet-accept-old-create-queue.patch	[new file with mode: 0644]	patch \| blob
beignet-cl_accelerator_intel.patch	[new file with mode: 0644]	patch \| blob
beignet-clearer-type-errors.patch	[new file with mode: 0644]	patch \| blob
beignet-coffeelake.patch	[new file with mode: 0644]	patch \| blob
beignet-cometlake.patch	[new file with mode: 0644]	patch \| blob
beignet-debian-885423.patch	[new file with mode: 0644]	patch \| blob
beignet-disable-wayland-warning.patch	[new file with mode: 0644]	patch \| blob
beignet-docs-broken-links.patch	[new file with mode: 0644]	patch \| blob
beignet-eventchain-memory-leak.patch	[new file with mode: 0644]	patch \| blob
beignet-grammar.patch	[new file with mode: 0644]	patch \| blob
beignet-in-order-queue.patch	[new file with mode: 0644]	patch \| blob
beignet-llvm10-support.patch	[new file with mode: 0644]	patch \| blob
beignet-llvm6-support.patch	[new file with mode: 0644]	patch \| blob
beignet-llvm7-support.patch	[new file with mode: 0644]	patch \| blob
beignet-llvm8-support.patch	[new file with mode: 0644]	patch \| blob
beignet-llvm9-support.patch	[new file with mode: 0644]	patch \| blob
beignet-reduce-notfound-output.patch	[new file with mode: 0644]	patch \| blob
beignet-reduce-notfound-output2.patch	[new file with mode: 0644]	patch \| blob
beignet-update-docs.patch	[new file with mode: 0644]	patch \| blob
beignet.spec		patch \| blob \| blame \| history