From 7d3d7cb588297bc8848c9693ae5c5454ac4bfb06 Mon Sep 17 00:00:00 2001 From: Jakub Bogusz Date: Wed, 19 May 2021 06:35:54 +0200 Subject: [PATCH] - merged some Debian patches --- beignet-accept-ignore--g.patch | 21 ++ beignet-accept-old-create-queue.patch | 48 ++++ beignet-cl_accelerator_intel.patch | 147 ++++++++++ beignet-clearer-type-errors.patch | 118 ++++++++ beignet-coffeelake.patch | 228 +++++++++++++++ beignet-cometlake.patch | 289 +++++++++++++++++++ beignet-debian-885423.patch | 83 ++++++ beignet-disable-wayland-warning.patch | 17 ++ beignet-docs-broken-links.patch | 63 +++++ beignet-eventchain-memory-leak.patch | 76 +++++ beignet-grammar.patch | 61 +++++ beignet-in-order-queue.patch | 381 ++++++++++++++++++++++++++ beignet-llvm10-support.patch | 334 ++++++++++++++++++++++ beignet-llvm6-support.patch | 37 +++ beignet-llvm7-support.patch | 106 +++++++ beignet-llvm8-support.patch | 71 +++++ beignet-llvm9-support.patch | 113 ++++++++ beignet-reduce-notfound-output.patch | 41 +++ beignet-reduce-notfound-output2.patch | 61 +++++ beignet-update-docs.patch | 161 +++++++++++ beignet.spec | 42 +++ 21 files changed, 2498 insertions(+) create mode 100644 beignet-accept-ignore--g.patch create mode 100644 beignet-accept-old-create-queue.patch create mode 100644 beignet-cl_accelerator_intel.patch create mode 100644 beignet-clearer-type-errors.patch create mode 100644 beignet-coffeelake.patch create mode 100644 beignet-cometlake.patch create mode 100644 beignet-debian-885423.patch create mode 100644 beignet-disable-wayland-warning.patch create mode 100644 beignet-docs-broken-links.patch create mode 100644 beignet-eventchain-memory-leak.patch create mode 100644 beignet-grammar.patch create mode 100644 beignet-in-order-queue.patch create mode 100644 beignet-llvm10-support.patch create mode 100644 beignet-llvm6-support.patch create mode 100644 beignet-llvm7-support.patch create mode 100644 beignet-llvm8-support.patch create mode 100644 beignet-llvm9-support.patch create mode 100644 beignet-reduce-notfound-output.patch create mode 100644 beignet-reduce-notfound-output2.patch create mode 100644 beignet-update-docs.patch diff --git a/beignet-accept-ignore--g.patch b/beignet-accept-ignore--g.patch new file mode 100644 index 0000000..8a88d96 --- /dev/null +++ b/beignet-accept-ignore--g.patch @@ -0,0 +1,21 @@ +Description: Don't error out when -g is passed + +Author: Rebecca N. Palmer +Bug-Debian: https://bugs.debian.org/881054 +Forwarded: https://lists.freedesktop.org/archives/beignet/2019-February/009228.html + +--- a/backend/src/backend/program.cpp ++++ b/backend/src/backend/program.cpp +@@ -985,6 +985,12 @@ EXTEND_QUOTE: + continue; // Don't push this str back; ignore it. + } + ++ if(str == "-g") { ++ // The OpenCL 2.0 standard requires accepting -g, ++ // but does not require that it actually does anything ++ continue; ++ } ++ + clOpt.push_back(str); + } + free(c_str); diff --git a/beignet-accept-old-create-queue.patch b/beignet-accept-old-create-queue.patch new file mode 100644 index 0000000..80f9258 --- /dev/null +++ b/beignet-accept-old-create-queue.patch @@ -0,0 +1,48 @@ +Description: Allow clCreateCommandQueue to create out-of-order queues + +Author: Rebecca N. Palmer +Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009215.html + +--- a/src/cl_api_command_queue.c ++++ b/src/cl_api_command_queue.c +@@ -27,35 +27,11 @@ clCreateCommandQueue(cl_context context, + cl_command_queue_properties properties, + cl_int *errcode_ret) + { +- cl_command_queue queue = NULL; +- cl_int err = CL_SUCCESS; +- +- do { +- if (!CL_OBJECT_IS_CONTEXT(context)) { +- err = CL_INVALID_CONTEXT; +- break; +- } +- +- err = cl_devices_list_include_check(context->device_num, context->devices, 1, &device); +- if (err) +- break; +- +- if (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE)) { +- err = CL_INVALID_VALUE; +- break; +- } +- +- if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /*not supported now.*/ +- err = CL_INVALID_QUEUE_PROPERTIES; +- break; +- } +- +- queue = cl_create_command_queue(context, device, properties, 0, &err); +- } while (0); +- +- if (errcode_ret) +- *errcode_ret = err; +- return queue; ++ cl_queue_properties props[3]; ++ props[0] = CL_QUEUE_PROPERTIES; ++ props[1] = properties; ++ props[2] = 0; ++ return clCreateCommandQueueWithProperties(context, device, props, errcode_ret); + } + + /* 2.0 new API for create command queue. */ diff --git a/beignet-cl_accelerator_intel.patch b/beignet-cl_accelerator_intel.patch new file mode 100644 index 0000000..c60dcaa --- /dev/null +++ b/beignet-cl_accelerator_intel.patch @@ -0,0 +1,147 @@ +Description: Move cl_intel_accelerator to cl_intel.h + +Beignet's own headers define cl_intel_accelerator etc directly in +cl_ext.h, but khronos-opencl-headers 2.2~ places these in a separate +file cl_ext_intel.h, and older versions do not include it at all. + +Author: Rebecca N. Palmer +Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009217.html + +--- a/include/CL/cl_intel.h ++++ b/include/CL/cl_intel.h +@@ -26,6 +26,115 @@ + extern "C" { + #endif + ++#if defined(__CL_EXT_H) && !defined(cl_intel_accelerator) ++#ifdef CL_VERSION_2_2 ++#include "CL/cl_ext_intel.h" ++#else ++/********************************* ++* cl_intel_accelerator extension * ++*********************************/ ++#define cl_intel_accelerator 1 ++#define cl_intel_motion_estimation 1 ++ ++typedef struct _cl_accelerator_intel* cl_accelerator_intel; ++typedef cl_uint cl_accelerator_type_intel; ++typedef cl_uint cl_accelerator_info_intel; ++ ++typedef struct _cl_motion_estimation_desc_intel { ++ cl_uint mb_block_type; ++ cl_uint subpixel_mode; ++ cl_uint sad_adjust_mode; ++ cl_uint search_path_type; ++} cl_motion_estimation_desc_intel; ++ ++/* Error Codes */ ++#define CL_INVALID_ACCELERATOR_INTEL -1094 ++#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095 ++#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096 ++#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097 ++ ++/* Deprecated Error Codes */ ++#define CL_INVALID_ACCELERATOR_INTEL_DEPRECATED -6000 ++#define CL_INVALID_ACCELERATOR_TYPE_INTEL_DEPRECATED -6001 ++#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL_DEPRECATED -6002 ++#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL_DEPRECATED -6003 ++ ++/* cl_accelerator_type_intel */ ++#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0 ++ ++/* cl_accelerator_info_intel */ ++#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090 ++#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091 ++#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092 ++#define CL_ACCELERATOR_TYPE_INTEL 0x4093 ++ ++/*cl_motion_detect_desc_intel flags */ ++#define CL_ME_MB_TYPE_16x16_INTEL 0x0 ++#define CL_ME_MB_TYPE_8x8_INTEL 0x1 ++#define CL_ME_MB_TYPE_4x4_INTEL 0x2 ++ ++#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 ++#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 ++#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2 ++ ++#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 ++#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1 ++ ++#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0 ++#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1 ++#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5 ++ ++extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL ++clCreateAcceleratorINTEL( ++ cl_context /* context */, ++ cl_accelerator_type_intel /* accelerator_type */, ++ size_t /* descriptor_size */, ++ const void* /* descriptor */, ++ cl_int* /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2; ++ ++typedef CL_API_ENTRY cl_accelerator_intel ++ (CL_API_CALL *clCreateAcceleratorINTEL_fn)( ++ cl_context /* context */, ++ cl_accelerator_type_intel /* accelerator_type */, ++ size_t /* descriptor_size */, ++ const void* /* descriptor */, ++ cl_int* /* errcode_ret */ ) CL_EXT_SUFFIX__VERSION_1_2; ++ ++extern CL_API_ENTRY cl_int CL_API_CALL ++clGetAcceleratorInfoINTEL ++( ++ cl_accelerator_intel /* accelerator */, ++ cl_accelerator_info_intel /* param_name */, ++ size_t /* param_value_size */, ++ void* /* param_value */, ++ size_t* /* param_value_size_ret */ ) CL_EXT_SUFFIX__VERSION_1_2; ++ ++typedef CL_API_ENTRY cl_int ++ (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)( ++ cl_accelerator_intel /* accelerator */, ++ cl_accelerator_info_intel /* param_name */, ++ size_t /* param_value_size */, ++ void* /* param_value */, ++ size_t* /* param_value_size_ret */ ) CL_EXT_SUFFIX__VERSION_1_2; ++ ++extern CL_API_ENTRY cl_int CL_API_CALL ++clRetainAcceleratorINTEL( ++ cl_accelerator_intel /* accelerator */ ) CL_EXT_SUFFIX__VERSION_1_2; ++ ++typedef CL_API_ENTRY cl_int ++ (CL_API_CALL *clRetainAcceleratorINTEL_fn)( ++ cl_accelerator_intel /* accelerator */ ) CL_EXT_SUFFIX__VERSION_1_2; ++ ++extern CL_API_ENTRY cl_int CL_API_CALL ++clReleaseAcceleratorINTEL( ++ cl_accelerator_intel /* accelerator */ ) CL_EXT_SUFFIX__VERSION_1_2; ++ ++typedef CL_API_ENTRY cl_int ++ (CL_API_CALL *clReleaseAcceleratorINTEL_fn)( ++ cl_accelerator_intel /* accelerator */ ) CL_EXT_SUFFIX__VERSION_1_2; ++#endif ++#endif ++ + #define CL_MEM_PINNABLE (1 << 10) + + /* Track allocations and report current number of unfreed allocations */ +--- a/src/cl_accelerator_intel.h ++++ b/src/cl_accelerator_intel.h +@@ -4,6 +4,7 @@ + #include "cl_base_object.h" + #include "CL/cl.h" + #include "CL/cl_ext.h" ++#include "CL/cl_intel.h" + #include + + struct _cl_accelerator_intel { +--- a/src/cl_driver.h ++++ b/src/cl_driver.h +@@ -24,6 +24,7 @@ + #include + #include "cl_driver_type.h" + #include "CL/cl_ext.h" ++#include "CL/cl_intel.h" + /* Various limitations we should remove actually */ + #define GEN_MAX_SURFACES 256 + #define GEN_MAX_SAMPLERS 16 diff --git a/beignet-clearer-type-errors.patch b/beignet-clearer-type-errors.patch new file mode 100644 index 0000000..09fc534 --- /dev/null +++ b/beignet-clearer-type-errors.patch @@ -0,0 +1,118 @@ +Description: More user-friendly "type not supported" errors + +(It would be even better if these returned build failure +rather than asserting, but that's not as easy) + +Author: Rebecca N. Palmer +Forwarded: https://lists.freedesktop.org/archives/beignet/2017-September/009169.html + +--- a/backend/src/backend/gen_insn_selection.cpp ++++ b/backend/src/backend/gen_insn_selection.cpp +@@ -5253,7 +5253,7 @@ extern bool OCL_DEBUGINFO; // first defi + write64Stateless(sel, address, src); + sel.pop(); + } else { +- GBE_ASSERT(sel.hasLongType()); ++ GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this device"); + write64Stateless(sel, address, src); + } + } +@@ -5838,7 +5838,7 @@ extern bool OCL_DEBUGINFO; // first defi + + /* The special case, when dst is half, float->word->half will lose accuracy. */ + if (dstType == TYPE_HALF) { +- GBE_ASSERT(sel.hasHalfType()); ++ GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this device"); + type = GEN_TYPE_HF; + } + +@@ -5879,7 +5879,7 @@ extern bool OCL_DEBUGINFO; // first defi + + if (dstType == TYPE_HALF) { + /* There is no MOV for Long <---> Half. So Long-->Float-->half. */ +- GBE_ASSERT(sel.hasLongType()); ++ GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this device"); + GBE_ASSERT(sel.hasHalfType()); + sel.push(); + if (sel.isScalarReg(insn.getSrc(0))) { +@@ -6181,7 +6181,7 @@ extern bool OCL_DEBUGINFO; // first defi + } + } else if (srcType == ir::TYPE_HALF) { + /* No need to consider old platform. if we support half, we must have native long. */ +- GBE_ASSERT(sel.hasLongType()); ++ GBE_ASSERTM(sel.hasLongType(), "Long (int64) not supported on this device"); + GBE_ASSERT(sel.hasHalfType()); + uint32_t type = dstType == TYPE_U64 ? GEN_TYPE_UD : GEN_TYPE_D; + GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0))), TYPE_U32), type); +@@ -6205,7 +6205,7 @@ extern bool OCL_DEBUGINFO; // first defi + sel.MOV(dst, tmp); + } + } else if (src.type == GEN_TYPE_DF) { +- GBE_ASSERT(sel.hasDoubleType()); ++ GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device"); + GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we support native long. + + // Just Mov +@@ -6224,7 +6224,7 @@ extern bool OCL_DEBUGINFO; // first defi + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + +- GBE_ASSERT(sel.hasDoubleType()); ++ GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device (if this is a literal, use '1.0f' not '1.0')"); + + if (sel.isScalarReg(insn.getDst(0))) { + // dst is scalar, just MOV and nothing more. +@@ -6263,7 +6263,7 @@ extern bool OCL_DEBUGINFO; // first defi + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + +- GBE_ASSERT(sel.hasDoubleType()); ++ GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device (if this is a literal, use '1.0f' not '1.0')"); + GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we support half. + + if (sel.isScalarReg(insn.getDst(0))) { // uniform case. +@@ -6329,7 +6329,7 @@ extern bool OCL_DEBUGINFO; // first defi + // Special case, half -> char/short. + /* [DevBDW+]: Format conversion to or from HF (Half Float) must be DWord-aligned and + strided by a DWord on the destination. */ +- GBE_ASSERT(sel.hasHalfType()); ++ GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this device"); + GenRegister tmp; + sel.push(); + if (sel.isScalarReg(insn.getSrc(0))) { +@@ -6361,7 +6361,7 @@ extern bool OCL_DEBUGINFO; // first defi + // Special case, char/uchar -> half + /* [DevBDW+]: Format conversion to or from HF (Half Float) must be DWord-aligned and + strided by a DWord on the destination. */ +- GBE_ASSERT(sel.hasHalfType()); ++ GBE_ASSERTM(sel.hasHalfType(), "Half precision not supported on this device"); + GenRegister tmp = GenRegister::retype(sel.unpacked_uw(sel.reg(FAMILY_DWORD, sel.isScalarReg(insn.getSrc(0)))), GEN_TYPE_HF); + sel.push(); + if (sel.isScalarReg(insn.getSrc(0))) { +@@ -6383,7 +6383,7 @@ extern bool OCL_DEBUGINFO; // first defi + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + const RegisterFamily dstFamily = getFamily(dstType); + +- GBE_ASSERT(sel.hasDoubleType()); ++ GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device (if this is a literal, use '1.0f' not '1.0')"); + GBE_ASSERT(sel.hasHalfType()); //So far, if we support double, we support half. + if (sel.isScalarReg(insn.getDst(0))) { + // dst is scalar, just MOV and nothing more. +@@ -6427,7 +6427,7 @@ extern bool OCL_DEBUGINFO; // first defi + const GenRegister dst = sel.selReg(insn.getDst(0), dstType); + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + +- GBE_ASSERT(sel.hasDoubleType()); ++ GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device"); + GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we support native long. + // Just Mov + sel.MOV(dst, src); +@@ -6442,7 +6442,7 @@ extern bool OCL_DEBUGINFO; // first defi + const GenRegister src = sel.selReg(insn.getSrc(0), srcType); + const RegisterFamily srcFamily = getFamily(srcType); + +- GBE_ASSERT(sel.hasDoubleType()); ++ GBE_ASSERTM(sel.hasDoubleType(), "Double precision not supported on this device"); + GBE_ASSERT(sel.hasLongType()); //So far, if we support double, we support native long. + + if (sel.hasLongType() && sel.hasLongRegRestrict()) { diff --git a/beignet-coffeelake.patch b/beignet-coffeelake.patch new file mode 100644 index 0000000..2b609b7 --- /dev/null +++ b/beignet-coffeelake.patch @@ -0,0 +1,228 @@ +Description: Enable Coffee Lake support + +Little change is needed here because the graphics core is the same as +Kaby Lake. Includes all PCI IDs currently supported by the kernel driver +in the drm-intel tree (Coffee Lake S, H and U devices in GT 1, 2 and 3 +configurations). + +Origin: upstream 7e181af2ea4d37f67406f2563c0e13fa1fdbb14b +Author: Mark Thompson + +--- a/backend/src/backend/gen_program.cpp ++++ b/backend/src/backend/gen_program.cpp +@@ -209,6 +209,8 @@ namespace gbe { + ctx = GBE_NEW(BxtContext, unit, name, deviceID, relaxMath); + } else if (IS_KABYLAKE(deviceID)) { + ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath); ++ } else if (IS_COFFEELAKE(deviceID)) { ++ ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath); + } else if (IS_GEMINILAKE(deviceID)) { + ctx = GBE_NEW(GlkContext, unit, name, deviceID, relaxMath); + } +@@ -328,6 +330,7 @@ namespace gbe { + (IS_SKYLAKE(deviceID) && MATCH_SKL_HEADER(binary)) || \ + (IS_BROXTON(deviceID) && MATCH_BXT_HEADER(binary)) || \ + (IS_KABYLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \ ++ (IS_COFFEELAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \ + (IS_GEMINILAKE(deviceID) && MATCH_GLK_HEADER(binary)) \ + ) + +@@ -436,6 +439,8 @@ namespace gbe { + FILL_BXT_HEADER(*binary); + }else if(IS_KABYLAKE(prog->deviceID)){ + FILL_KBL_HEADER(*binary); ++ }else if(IS_COFFEELAKE(prog->deviceID)){ ++ FILL_KBL_HEADER(*binary); + }else if(IS_GEMINILAKE(prog->deviceID)){ + FILL_GLK_HEADER(*binary); + }else { +--- a/src/cl_device_data.h ++++ b/src/cl_device_data.h +@@ -372,7 +372,59 @@ + (devid == PCI_CHIP_GLK_3x6 || \ + devid == PCI_CHIP_GLK_2x6) + +-#define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid)) ++#define PCI_CHIP_COFFEELAKE_S_GT1_1 0x3E90 ++#define PCI_CHIP_COFFEELAKE_S_GT1_2 0x3E93 ++#define PCI_CHIP_COFFEELAKE_S_GT1_3 0x3E99 ++ ++#define PCI_CHIP_COFFEELAKE_U_GT1_1 0x3EA1 ++#define PCI_CHIP_COFFEELAKE_U_GT1_2 0x3EA4 ++ ++#define PCI_CHIP_COFFEELAKE_S_GT2_1 0x3E91 ++#define PCI_CHIP_COFFEELAKE_S_GT2_2 0x3E92 ++#define PCI_CHIP_COFFEELAKE_S_GT2_3 0x3E96 ++#define PCI_CHIP_COFFEELAKE_S_GT2_4 0x3E9A ++ ++#define PCI_CHIP_COFFEELAKE_H_GT2_1 0x3E94 ++#define PCI_CHIP_COFFEELAKE_H_GT2_2 0x3E9B ++ ++#define PCI_CHIP_COFFEELAKE_U_GT2_1 0x3EA0 ++#define PCI_CHIP_COFFEELAKE_U_GT2_2 0x3EA3 ++#define PCI_CHIP_COFFEELAKE_U_GT2_3 0x3EA9 ++ ++#define PCI_CHIP_COFFEELAKE_U_GT3_1 0x3EA2 ++#define PCI_CHIP_COFFEELAKE_U_GT3_2 0x3EA5 ++#define PCI_CHIP_COFFEELAKE_U_GT3_3 0x3EA6 ++#define PCI_CHIP_COFFEELAKE_U_GT3_4 0x3EA7 ++#define PCI_CHIP_COFFEELAKE_U_GT3_5 0x3EA8 ++ ++#define IS_CFL_GT1(devid) \ ++ (devid == PCI_CHIP_COFFEELAKE_S_GT1_1 || \ ++ devid == PCI_CHIP_COFFEELAKE_S_GT1_2 || \ ++ devid == PCI_CHIP_COFFEELAKE_S_GT1_3 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT1_1 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT1_2) ++ ++#define IS_CFL_GT2(devid) \ ++ (devid == PCI_CHIP_COFFEELAKE_S_GT2_1 || \ ++ devid == PCI_CHIP_COFFEELAKE_S_GT2_2 || \ ++ devid == PCI_CHIP_COFFEELAKE_S_GT2_3 || \ ++ devid == PCI_CHIP_COFFEELAKE_S_GT2_4 || \ ++ devid == PCI_CHIP_COFFEELAKE_H_GT2_1 || \ ++ devid == PCI_CHIP_COFFEELAKE_H_GT2_2 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT2_1 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT2_2 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT2_3) ++ ++#define IS_CFL_GT3(devid) \ ++ (devid == PCI_CHIP_COFFEELAKE_U_GT3_1 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT3_2 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT3_3 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT3_4 || \ ++ devid == PCI_CHIP_COFFEELAKE_U_GT3_5) ++ ++#define IS_COFFEELAKE(devid) (IS_CFL_GT1(devid) || IS_CFL_GT2(devid) || IS_CFL_GT3(devid)) ++ ++#define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid) || IS_COFFEELAKE(devid)) + + #define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120) + +--- a/src/cl_device_id.c ++++ b/src/cl_device_id.c +@@ -274,6 +274,36 @@ static struct _cl_device_id intel_glk12eu_device = { + #include "cl_gen9_device.h" + }; + ++static struct _cl_device_id intel_cfl_gt1_device = { ++ .max_compute_unit = 12, ++ .max_thread_per_unit = 7, ++ .sub_slice_count = 2, ++ .max_work_item_sizes = {512, 512, 512}, ++ .max_work_group_size = 256, ++ .max_clock_frequency = 1000, ++#include "cl_gen9_device.h" ++}; ++ ++static struct _cl_device_id intel_cfl_gt2_device = { ++ .max_compute_unit = 24, ++ .max_thread_per_unit = 7, ++ .sub_slice_count = 3, ++ .max_work_item_sizes = {512, 512, 512}, ++ .max_work_group_size = 256, ++ .max_clock_frequency = 1000, ++#include "cl_gen9_device.h" ++}; ++ ++static struct _cl_device_id intel_cfl_gt3_device = { ++ .max_compute_unit = 48, ++ .max_thread_per_unit = 7, ++ .sub_slice_count = 6, ++ .max_work_item_sizes = {512, 512, 512}, ++ .max_work_group_size = 256, ++ .max_clock_frequency = 1000, ++#include "cl_gen9_device.h" ++}; ++ + LOCAL cl_device_id + cl_get_gt_device(cl_device_type device_type) + { +@@ -785,6 +815,64 @@ glk12eu_break: + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + ++ case PCI_CHIP_COFFEELAKE_S_GT1_1: ++ case PCI_CHIP_COFFEELAKE_S_GT1_2: ++ case PCI_CHIP_COFFEELAKE_S_GT1_3: ++ DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT1"); ++ case PCI_CHIP_COFFEELAKE_U_GT1_1: ++ case PCI_CHIP_COFFEELAKE_U_GT1_2: ++ DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT1"); ++cfl_gt1_break: ++ intel_cfl_gt1_device.device_id = device_id; ++ intel_cfl_gt1_device.platform = cl_get_platform_default(); ++ ret = &intel_cfl_gt1_device; ++ cl_intel_platform_get_default_extension(ret); ++#ifdef ENABLE_FP64 ++ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); ++#endif ++ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); ++ break; ++ ++ case PCI_CHIP_COFFEELAKE_S_GT2_1: ++ case PCI_CHIP_COFFEELAKE_S_GT2_2: ++ case PCI_CHIP_COFFEELAKE_S_GT2_3: ++ case PCI_CHIP_COFFEELAKE_S_GT2_4: ++ DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT2"); ++ case PCI_CHIP_COFFEELAKE_H_GT2_1: ++ case PCI_CHIP_COFFEELAKE_H_GT2_2: ++ DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Halo GT2"); ++ case PCI_CHIP_COFFEELAKE_U_GT2_1: ++ case PCI_CHIP_COFFEELAKE_U_GT2_2: ++ case PCI_CHIP_COFFEELAKE_U_GT2_3: ++ DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT2"); ++cfl_gt2_break: ++ intel_cfl_gt2_device.device_id = device_id; ++ intel_cfl_gt2_device.platform = cl_get_platform_default(); ++ ret = &intel_cfl_gt2_device; ++ cl_intel_platform_get_default_extension(ret); ++#ifdef ENABLE_FP64 ++ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); ++#endif ++ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); ++ break; ++ ++ case PCI_CHIP_COFFEELAKE_U_GT3_1: ++ case PCI_CHIP_COFFEELAKE_U_GT3_2: ++ case PCI_CHIP_COFFEELAKE_U_GT3_3: ++ case PCI_CHIP_COFFEELAKE_U_GT3_4: ++ case PCI_CHIP_COFFEELAKE_U_GT3_5: ++ DECL_INFO_STRING(cfl_gt3_break, intel_cfl_gt3_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT3"); ++cfl_gt3_break: ++ intel_cfl_gt3_device.device_id = device_id; ++ intel_cfl_gt3_device.platform = cl_get_platform_default(); ++ ret = &intel_cfl_gt3_device; ++ cl_intel_platform_get_default_extension(ret); ++#ifdef ENABLE_FP64 ++ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); ++#endif ++ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); ++ break; ++ + case PCI_CHIP_SANDYBRIDGE_BRIDGE: + case PCI_CHIP_SANDYBRIDGE_GT1: + case PCI_CHIP_SANDYBRIDGE_GT2: +@@ -992,7 +1080,10 @@ LOCAL cl_bool is_gen_device(cl_device_id device) { + device == &intel_kbl_gt3_device || + device == &intel_kbl_gt4_device || + device == &intel_glk18eu_device || +- device == &intel_glk12eu_device; ++ device == &intel_glk12eu_device || ++ device == &intel_cfl_gt1_device || ++ device == &intel_cfl_gt2_device || ++ device == &intel_cfl_gt3_device; + } + + LOCAL cl_int +@@ -1420,7 +1511,9 @@ cl_device_get_version(cl_device_id device, cl_int *ver) + || device == &intel_bxt18eu_device || device == &intel_bxt12eu_device || device == &intel_kbl_gt1_device + || device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device + || device == &intel_kbl_gt4_device || device == &intel_kbl_gt15_device +- || device == &intel_glk18eu_device || device == &intel_glk12eu_device) { ++ || device == &intel_glk18eu_device || device == &intel_glk12eu_device ++ || device == &intel_cfl_gt1_device || device == &intel_cfl_gt1_device ++ || device == &intel_cfl_gt3_device) { + *ver = 9; + } else + return CL_INVALID_VALUE; diff --git a/beignet-cometlake.patch b/beignet-cometlake.patch new file mode 100644 index 0000000..0d0817e --- /dev/null +++ b/beignet-cometlake.patch @@ -0,0 +1,289 @@ +Description: Add Comet Lake/Amber Lake/more Coffee Lake support + +Minimally tested, I suggest using intel-opencl-icd instead if possible + +Author: Ridley Combs +Bug-Ubuntu: https://launchpad.net/bugs/1905340 +Origin: https://github.com/intel/beignet/pull/20/files + +--- a/backend/src/backend/gen_program.cpp ++++ b/backend/src/backend/gen_program.cpp +@@ -211,6 +211,10 @@ namespace gbe { + ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath); + } else if (IS_COFFEELAKE(deviceID)) { + ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath); ++ } else if (IS_COMETLAKE(deviceID)) { ++ ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath); ++ } else if (IS_AMBERLAKE(deviceID)) { ++ ctx = GBE_NEW(KblContext, unit, name, deviceID, relaxMath); + } else if (IS_GEMINILAKE(deviceID)) { + ctx = GBE_NEW(GlkContext, unit, name, deviceID, relaxMath); + } +@@ -331,6 +335,8 @@ namespace gbe { + (IS_BROXTON(deviceID) && MATCH_BXT_HEADER(binary)) || \ + (IS_KABYLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \ + (IS_COFFEELAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \ ++ (IS_COMETLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \ ++ (IS_AMBERLAKE(deviceID) && MATCH_KBL_HEADER(binary)) || \ + (IS_GEMINILAKE(deviceID) && MATCH_GLK_HEADER(binary)) \ + ) + +@@ -441,6 +447,10 @@ namespace gbe { + FILL_KBL_HEADER(*binary); + }else if(IS_COFFEELAKE(prog->deviceID)){ + FILL_KBL_HEADER(*binary); ++ }else if(IS_COMETLAKE(prog->deviceID)){ ++ FILL_KBL_HEADER(*binary); ++ }else if(IS_AMBERLAKE(prog->deviceID)){ ++ FILL_KBL_HEADER(*binary); + }else if(IS_GEMINILAKE(prog->deviceID)){ + FILL_GLK_HEADER(*binary); + }else { +--- a/src/cl_device_data.h ++++ b/src/cl_device_data.h +@@ -376,6 +376,8 @@ + #define PCI_CHIP_COFFEELAKE_S_GT1_2 0x3E93 + #define PCI_CHIP_COFFEELAKE_S_GT1_3 0x3E99 + ++#define PCI_CHIP_COFFEELAKE_H_GT1_1 0x3E9C ++ + #define PCI_CHIP_COFFEELAKE_U_GT1_1 0x3EA1 + #define PCI_CHIP_COFFEELAKE_U_GT1_2 0x3EA4 + +@@ -383,6 +385,7 @@ + #define PCI_CHIP_COFFEELAKE_S_GT2_2 0x3E92 + #define PCI_CHIP_COFFEELAKE_S_GT2_3 0x3E96 + #define PCI_CHIP_COFFEELAKE_S_GT2_4 0x3E9A ++#define PCI_CHIP_COFFEELAKE_S_GT2_5 0x3E98 + + #define PCI_CHIP_COFFEELAKE_H_GT2_1 0x3E94 + #define PCI_CHIP_COFFEELAKE_H_GT2_2 0x3E9B +@@ -401,6 +404,7 @@ + (devid == PCI_CHIP_COFFEELAKE_S_GT1_1 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT1_2 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT1_3 || \ ++ devid == PCI_CHIP_COFFEELAKE_H_GT1_1 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT1_1 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT1_2) + +@@ -409,6 +413,7 @@ + devid == PCI_CHIP_COFFEELAKE_S_GT2_2 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT2_3 || \ + devid == PCI_CHIP_COFFEELAKE_S_GT2_4 || \ ++ devid == PCI_CHIP_COFFEELAKE_S_GT2_5 || \ + devid == PCI_CHIP_COFFEELAKE_H_GT2_1 || \ + devid == PCI_CHIP_COFFEELAKE_H_GT2_2 || \ + devid == PCI_CHIP_COFFEELAKE_U_GT2_1 || \ +@@ -424,7 +429,65 @@ + + #define IS_COFFEELAKE(devid) (IS_CFL_GT1(devid) || IS_CFL_GT2(devid) || IS_CFL_GT3(devid)) + +-#define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid) || IS_COFFEELAKE(devid)) ++#define PCI_CHIP_COMETLAKE_S_GT1_1 0x9BA5 ++#define PCI_CHIP_COMETLAKE_S_GT1_2 0x9BA8 ++ ++#define PCI_CHIP_COMETLAKE_H_GT1_1 0x9BA4 ++#define PCI_CHIP_COMETLAKE_H_GT1_2 0x9BA2 ++ ++#define PCI_CHIP_COMETLAKE_U_GT1_1 0x9B21 ++#define PCI_CHIP_COMETLAKE_U_GT1_2 0x9BAA ++#define PCI_CHIP_COMETLAKE_U_GT1_3 0x9BAC ++ ++#define PCI_CHIP_COMETLAKE_S_GT2_1 0x9BC5 ++#define PCI_CHIP_COMETLAKE_S_GT2_2 0x9BC8 ++ ++#define PCI_CHIP_COMETLAKE_H_GT2_1 0x9BC4 ++#define PCI_CHIP_COMETLAKE_H_GT2_2 0x9BC2 ++ ++#define PCI_CHIP_COMETLAKE_W_GT2_1 0x9BC6 ++#define PCI_CHIP_COMETLAKE_W_GT2_2 0x9BE6 ++#define PCI_CHIP_COMETLAKE_W_GT2_3 0x9BF6 ++ ++#define PCI_CHIP_COMETLAKE_U_GT2_1 0x9B41 ++#define PCI_CHIP_COMETLAKE_U_GT2_2 0x9BCA ++#define PCI_CHIP_COMETLAKE_U_GT2_3 0x9BCC ++ ++#define IS_CML_GT1(devid) \ ++ (devid == PCI_CHIP_COMETLAKE_S_GT1_1 || \ ++ devid == PCI_CHIP_COMETLAKE_S_GT1_2 || \ ++ devid == PCI_CHIP_COMETLAKE_H_GT1_1 || \ ++ devid == PCI_CHIP_COMETLAKE_H_GT1_2 || \ ++ devid == PCI_CHIP_COMETLAKE_U_GT1_1 || \ ++ devid == PCI_CHIP_COMETLAKE_U_GT1_2 || \ ++ devid == PCI_CHIP_COMETLAKE_U_GT1_3) ++ ++#define IS_CML_GT2(devid) \ ++ (devid == PCI_CHIP_COMETLAKE_S_GT2_1 || \ ++ devid == PCI_CHIP_COMETLAKE_S_GT2_2 || \ ++ devid == PCI_CHIP_COMETLAKE_H_GT2_1 || \ ++ devid == PCI_CHIP_COMETLAKE_H_GT2_2 || \ ++ devid == PCI_CHIP_COMETLAKE_W_GT2_1 || \ ++ devid == PCI_CHIP_COMETLAKE_W_GT2_2 || \ ++ devid == PCI_CHIP_COMETLAKE_W_GT2_3 || \ ++ devid == PCI_CHIP_COMETLAKE_U_GT2_1 || \ ++ devid == PCI_CHIP_COMETLAKE_U_GT2_2 || \ ++ devid == PCI_CHIP_COMETLAKE_U_GT2_3) ++ ++#define IS_COMETLAKE(devid) (IS_CML_GT1(devid) || IS_CML_GT2(devid)) ++ ++#define PCI_CHIP_AMBERLAKE_Y_GT2_1 0x591C ++#define PCI_CHIP_AMBERLAKE_Y_GT2_2 0x87C0 ++#define PCI_CHIP_AMBERLAKE_Y_GT2_3 0x87CA ++ ++#define IS_AML_GT2(devid) \ ++ (devid == PCI_CHIP_AMBERLAKE_Y_GT2_1 || \ ++ devid == PCI_CHIP_AMBERLAKE_Y_GT2_2 || \ ++ devid == PCI_CHIP_AMBERLAKE_Y_GT2_3) ++ ++#define IS_AMBERLAKE(devid) (IS_AML_GT2(devid)) ++ ++#define IS_GEN9(devid) (IS_SKYLAKE(devid) || IS_BROXTON(devid) || IS_KABYLAKE(devid) || IS_GEMINILAKE(devid) || IS_COFFEELAKE(devid) || IS_COMETLAKE(devid) || IS_AMBERLAKE(devid)) + + #define MAX_OCLVERSION(devid) (IS_GEN9(devid) ? 200 : 120) + +--- a/src/cl_device_id.c ++++ b/src/cl_device_id.c +@@ -304,6 +304,36 @@ static struct _cl_device_id intel_cfl_gt3_device = { + #include "cl_gen9_device.h" + }; + ++static struct _cl_device_id intel_cml_gt1_device = { ++ .max_compute_unit = 12, ++ .max_thread_per_unit = 7, ++ .sub_slice_count = 2, ++ .max_work_item_sizes = {512, 512, 512}, ++ .max_work_group_size = 256, ++ .max_clock_frequency = 1000, ++#include "cl_gen9_device.h" ++}; ++ ++static struct _cl_device_id intel_cml_gt2_device = { ++ .max_compute_unit = 24, ++ .max_thread_per_unit = 7, ++ .sub_slice_count = 3, ++ .max_work_item_sizes = {512, 512, 512}, ++ .max_work_group_size = 256, ++ .max_clock_frequency = 1000, ++#include "cl_gen9_device.h" ++}; ++ ++static struct _cl_device_id intel_aml_gt2_device = { ++ .max_compute_unit = 24, ++ .max_thread_per_unit = 7, ++ .sub_slice_count = 3, ++ .max_work_item_sizes = {512, 512, 512}, ++ .max_work_group_size = 256, ++ .max_clock_frequency = 1000, ++#include "cl_gen9_device.h" ++}; ++ + LOCAL cl_device_id + cl_get_gt_device(cl_device_type device_type) + { +@@ -819,6 +849,8 @@ cl_get_gt_device(cl_device_type device_type) + case PCI_CHIP_COFFEELAKE_S_GT1_2: + case PCI_CHIP_COFFEELAKE_S_GT1_3: + DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT1"); ++ case PCI_CHIP_COFFEELAKE_H_GT1_1: ++ DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Halo GT1"); + case PCI_CHIP_COFFEELAKE_U_GT1_1: + case PCI_CHIP_COFFEELAKE_U_GT1_2: + DECL_INFO_STRING(cfl_gt1_break, intel_cfl_gt1_device, name, "Intel(R) UHD Graphics Coffee Lake Mobile GT1"); +@@ -837,6 +869,7 @@ cl_get_gt_device(cl_device_type device_type) + case PCI_CHIP_COFFEELAKE_S_GT2_2: + case PCI_CHIP_COFFEELAKE_S_GT2_3: + case PCI_CHIP_COFFEELAKE_S_GT2_4: ++ case PCI_CHIP_COFFEELAKE_S_GT2_5: + DECL_INFO_STRING(cfl_gt2_break, intel_cfl_gt2_device, name, "Intel(R) UHD Graphics Coffee Lake Desktop GT2"); + case PCI_CHIP_COFFEELAKE_H_GT2_1: + case PCI_CHIP_COFFEELAKE_H_GT2_2: +@@ -873,6 +906,67 @@ cl_get_gt_device(cl_device_type device_type) + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); + break; + ++ case PCI_CHIP_COMETLAKE_S_GT1_1: ++ case PCI_CHIP_COMETLAKE_S_GT1_2: ++ DECL_INFO_STRING(cml_gt1_break, intel_cml_gt1_device, name, "Intel(R) UHD Graphics Comet Lake Desktop GT1"); ++ case PCI_CHIP_COMETLAKE_H_GT1_1: ++ case PCI_CHIP_COMETLAKE_H_GT1_2: ++ DECL_INFO_STRING(cml_gt1_break, intel_cml_gt1_device, name, "Intel(R) UHD Graphics Comet Lake Halo GT1"); ++ case PCI_CHIP_COMETLAKE_U_GT1_1: ++ case PCI_CHIP_COMETLAKE_U_GT1_2: ++ case PCI_CHIP_COMETLAKE_U_GT1_3: ++ DECL_INFO_STRING(cml_gt1_break, intel_cml_gt1_device, name, "Intel(R) UHD Graphics Comet Lake Mobile GT1"); ++cml_gt1_break: ++ intel_cml_gt1_device.device_id = device_id; ++ intel_cml_gt1_device.platform = cl_get_platform_default(); ++ ret = &intel_cml_gt1_device; ++ cl_intel_platform_get_default_extension(ret); ++#ifdef ENABLE_FP64 ++ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); ++#endif ++ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); ++ break; ++ ++ case PCI_CHIP_COMETLAKE_S_GT2_1: ++ case PCI_CHIP_COMETLAKE_S_GT2_2: ++ DECL_INFO_STRING(cml_gt2_break, intel_cml_gt2_device, name, "Intel(R) UHD Graphics Comet Lake Desktop GT2"); ++ case PCI_CHIP_COMETLAKE_H_GT2_1: ++ case PCI_CHIP_COMETLAKE_H_GT2_2: ++ DECL_INFO_STRING(cml_gt2_break, intel_cml_gt2_device, name, "Intel(R) UHD Graphics Comet Lake Halo GT2"); ++ case PCI_CHIP_COMETLAKE_W_GT2_1: ++ case PCI_CHIP_COMETLAKE_W_GT2_2: ++ case PCI_CHIP_COMETLAKE_W_GT2_3: ++ DECL_INFO_STRING(cml_gt2_break, intel_cml_gt2_device, name, "Intel(R) UHD Graphics Comet Lake Workstation GT2"); ++ case PCI_CHIP_COMETLAKE_U_GT2_1: ++ case PCI_CHIP_COMETLAKE_U_GT2_2: ++ case PCI_CHIP_COMETLAKE_U_GT2_3: ++ DECL_INFO_STRING(cml_gt2_break, intel_cml_gt2_device, name, "Intel(R) UHD Graphics Comet Lake Mobile GT2"); ++cml_gt2_break: ++ intel_cml_gt2_device.device_id = device_id; ++ intel_cml_gt2_device.platform = cl_get_platform_default(); ++ ret = &intel_cml_gt2_device; ++ cl_intel_platform_get_default_extension(ret); ++#ifdef ENABLE_FP64 ++ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); ++#endif ++ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); ++ break; ++ ++ case PCI_CHIP_AMBERLAKE_Y_GT2_1: ++ case PCI_CHIP_AMBERLAKE_Y_GT2_2: ++ case PCI_CHIP_AMBERLAKE_Y_GT2_3: ++ DECL_INFO_STRING(aml_gt2_break, intel_aml_gt2_device, name, "Intel(R) UHD Graphics Amber Lake ULX GT2"); ++aml_gt2_break: ++ intel_aml_gt2_device.device_id = device_id; ++ intel_aml_gt2_device.platform = cl_get_platform_default(); ++ ret = &intel_aml_gt2_device; ++ cl_intel_platform_get_default_extension(ret); ++#ifdef ENABLE_FP64 ++ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id); ++#endif ++ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id); ++ break; ++ + case PCI_CHIP_SANDYBRIDGE_BRIDGE: + case PCI_CHIP_SANDYBRIDGE_GT1: + case PCI_CHIP_SANDYBRIDGE_GT2: +@@ -1083,7 +1177,10 @@ LOCAL cl_bool is_gen_device(cl_device_id device) { + device == &intel_glk12eu_device || + device == &intel_cfl_gt1_device || + device == &intel_cfl_gt2_device || +- device == &intel_cfl_gt3_device; ++ device == &intel_cfl_gt3_device || ++ device == &intel_cml_gt1_device || ++ device == &intel_cml_gt2_device || ++ device == &intel_aml_gt2_device; + } + + LOCAL cl_int +@@ -1513,7 +1610,8 @@ cl_device_get_version(cl_device_id device, cl_int *ver) + || device == &intel_kbl_gt4_device || device == &intel_kbl_gt15_device + || device == &intel_glk18eu_device || device == &intel_glk12eu_device + || device == &intel_cfl_gt1_device || device == &intel_cfl_gt1_device +- || device == &intel_cfl_gt3_device) { ++ || device == &intel_cfl_gt3_device || device == &intel_cml_gt1_device ++ || device == &intel_cml_gt2_device || device == &intel_aml_gt2_device) { + *ver = 9; + } else + return CL_INVALID_VALUE; diff --git a/beignet-debian-885423.patch b/beignet-debian-885423.patch new file mode 100644 index 0000000..ac10efa --- /dev/null +++ b/beignet-debian-885423.patch @@ -0,0 +1,83 @@ +Description: Fix self-test fail in some conditions + +Reverts upstream 81755054c4c19d821e58456a1a7d601806e60e92 + +Known triggers are Darktable and the GEGL test suite + +Note that this does *not* fix the other Darktable issue +(https://gitlab.freedesktop.org/beignet/beignet/issues/60): beignet still +needs to be blacklisted in Darktable (which it is by default). + +Author: Rebecca N. Palmer +Bug: https://gitlab.freedesktop.org/beignet/beignet/issues/7 +Bug-Debian: https://bugs.debian.org/885423 +Forwarded: https://lists.freedesktop.org/archives/beignet/2019-January/009227.html + +diff --git b/backend/src/backend/gen_insn_selection_optimize.cpp a/backend/src/backend/gen_insn_selection_optimize.cpp +index 07547ec4..d2e0fb9b 100644 +--- b/backend/src/backend/gen_insn_selection_optimize.cpp ++++ a/backend/src/backend/gen_insn_selection_optimize.cpp +@@ -74,7 +74,8 @@ namespace gbe + const GenRegister& replacement) : + insn(insn), intermedia(intermedia), replacement(replacement) + { +- assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD); ++ assert(insn.opcode == SEL_OP_MOV); ++ assert(&(insn.src(0)) == &replacement); + assert(&(insn.dst(0)) == &intermedia); + this->elements = CalculateElements(intermedia, insn.state.execWidth); + replacementOverwritten = false; +@@ -101,7 +102,6 @@ namespace gbe + void doReplacement(ReplaceInfo* info); + bool CanBeReplaced(const ReplaceInfo* info, const SelectionInstruction& insn, const GenRegister& var); + void cleanReplaceInfoMap(); +- void doNegAddOptimization(SelectionInstruction &insn); + + SelectionBlock &bb; + const ir::Liveness::LiveOut& liveout; +@@ -159,13 +159,8 @@ namespace gbe + + void SelBasicBlockOptimizer::addToReplaceInfoMap(SelectionInstruction& insn) + { +- assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD); +- GenRegister &src = insn.src(0); +- if (insn.opcode == SEL_OP_ADD) { +- if (src.file == GEN_IMMEDIATE_VALUE) +- src = insn.src(1); +- } +- ++ assert(insn.opcode == SEL_OP_MOV); ++ const GenRegister& src = insn.src(0); + const GenRegister& dst = insn.dst(0); + if (src.type != dst.type || src.file != dst.file) + return; +@@ -254,29 +249,10 @@ namespace gbe + + if (insn.opcode == SEL_OP_MOV) + addToReplaceInfoMap(insn); +- +- doNegAddOptimization(insn); + } + cleanReplaceInfoMap(); + } + +- /* LLVM transform Mad(a, -b, c) to +- Add b, -b, 0 +- Mad val, a, b, c +- for Gen support negtive modifier, mad(a, -b, c) is native suppoted. +- Also it can be used for the same like instruction sequence. +- Do it just like a: mov b, -b, so it is a Mov operation like LocalCopyPropagation +- */ +- void SelBasicBlockOptimizer::doNegAddOptimization(SelectionInstruction &insn) { +- if (insn.opcode == SEL_OP_ADD) { +- GenRegister src0 = insn.src(0); +- GenRegister src1 = insn.src(1); +- if ((src0.negation && src1.file == GEN_IMMEDIATE_VALUE && src1.value.f == 0.0f) || +- (src1.negation && src0.file == GEN_IMMEDIATE_VALUE && src0.value.f == 0.0f)) +- addToReplaceInfoMap(insn); +- } +- } +- + void SelBasicBlockOptimizer::run() + { + for (size_t i = 0; i < MaxTries; ++i) { diff --git a/beignet-disable-wayland-warning.patch b/beignet-disable-wayland-warning.patch new file mode 100644 index 0000000..46b6a60 --- /dev/null +++ b/beignet-disable-wayland-warning.patch @@ -0,0 +1,17 @@ +Description: Disable expected-under-Wayland warning + +Author: Rebecca N. Palmer +Bug-Debian: https://bugs.debian.org/882486 +Forwarded: https://lists.freedesktop.org/archives/beignet/2018-January/009182.html + +--- beignet-1.3.2.orig/src/intel/intel_driver.c ++++ beignet-1.3.2/src/intel/intel_driver.c +@@ -235,8 +235,6 @@ if(intel->x11_display) { + intel_driver_init_shared(intel, intel->dri_ctx); + Xfree(driver_name); + } +- else +- fprintf(stderr, "X server found. dri2 connection failed! \n"); + } + #endif + diff --git a/beignet-docs-broken-links.patch b/beignet-docs-broken-links.patch new file mode 100644 index 0000000..940da1f --- /dev/null +++ b/beignet-docs-broken-links.patch @@ -0,0 +1,63 @@ +Description: Fix broken links and list syntax in documentation + +The upstream documentation is meant to be a part of a larger wiki, +and does not work properly when built as-is on its own + +Author: Rebecca N. Palmer + +--- a/docs/Beignet.mdwn ++++ b/docs/Beignet.mdwn +@@ -11,7 +11,7 @@ about the compiler, please refer to `bac + + News + ---- +-[[Beignet project news|Beignet/NEWS]] ++[[Beignet project news|NEWS]] + + Prerequisite + ------------ +@@ -139,6 +139,7 @@ need to refer the "Known Issues" section + + Normally, beignet needs to run under X server environment as normal user. If there isn't X server, + beignet provides two alternative to run: ++ + * Run as root without X. + * Enable the drm render nodes by passing drm.rnodes=1 to the kernel boot args, then you can run beignet with non-root and without X. + +@@ -288,15 +289,15 @@ Please specify your hardware when report + + Documents for OpenCL application developers + ------------------------------------------- +-- [[Cross compile (yocto)|Beignet/howto/cross-compiler-howto]] +-- [[Work with old system without c++11|Beignet/howto/oldgcc-howto]] +-- [[Kernel Optimization Guide|Beignet/optimization-guide]] +-- [[Libva Buffer Sharing|Beignet/howto/libva-buffer-sharing-howto]] +-- [[V4l2 Buffer Sharing|Beignet/howto/v4l2-buffer-sharing-howto]] ++- [[Cross compile (yocto)|howto/cross-compiler-howto]] ++- [[Work with old system without c++11|howto/oldgcc-howto]] ++- [[Kernel Optimization Guide|optimization-guide]] ++- [[Libva Buffer Sharing|howto/libva-buffer-sharing-howto]] ++- [[V4l2 Buffer Sharing|howto/v4l2-buffer-sharing-howto]] +-- [[OpenGL Buffer Sharing|Beignet/howto/gl-buffer-sharing-howto]] +-- [[Video Motion Estimation|Beignet/howto/video-motion-estimation-howto]] +-- [[Stand Alone Unit Test|Beignet/howto/stand-alone-utest-howto]] +-- [[Android build|Beignet/howto/android-build-howto]] ++- [[OpenGL Buffer Sharing|howto/gl-buffer-sharing-howto]] ++- [[Video Motion Estimation|howto/video-motion-estimation-howto]] ++- [[Stand Alone Unit Test|howto/stand-alone-utest-howto]] ++- [[Android build|howto/android-build-howto]] + + The wiki URL is as below: + [http://www.freedesktop.org/wiki/Software/Beignet/](http://www.freedesktop.org/wiki/Software/Beignet/) +--- a/docs/howto/video-motion-estimation-howto.mdwn ++++ b/docs/howto/video-motion-estimation-howto.mdwn +@@ -66,6 +66,6 @@ Please go through it for details. + More references + --------------- + +-https://www.khronos.org/registry/cl/extensions/intel/cl_intel_accelerator.txt +-https://www.khronos.org/registry/cl/extensions/intel/cl_intel_motion_estimation.txt +-https://software.intel.com/en-us/articles/intro-to-motion-estimation-extension-for-opencl ++[https://www.khronos.org/registry/cl/extensions/intel/cl_intel_accelerator.txt](https://www.khronos.org/registry/cl/extensions/intel/cl_intel_accelerator.txt) ++[https://www.khronos.org/registry/cl/extensions/intel/cl_intel_motion_estimation.txt](https://www.khronos.org/registry/cl/extensions/intel/cl_intel_motion_estimation.txt) ++[https://software.intel.com/en-us/articles/intro-to-motion-estimation-extension-for-opencl](https://software.intel.com/en-us/articles/intro-to-motion-estimation-extension-for-opencl) diff --git a/beignet-eventchain-memory-leak.patch b/beignet-eventchain-memory-leak.patch new file mode 100644 index 0000000..4b651da --- /dev/null +++ b/beignet-eventchain-memory-leak.patch @@ -0,0 +1,76 @@ +Description: Don't leak memory on long chains of events + +Delete event->depend_events when no longer needed to avoid keeping +the whole dependency tree in memory as long as the final event exists + +Author: Rebecca N. Palmer +Bug-Ubuntu: https://launchpad.net/bugs/1354086 +Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009209.html + +--- a/src/cl_event.c ++++ b/src/cl_event.c +@@ -184,6 +184,25 @@ cl_event_new(cl_context ctx, cl_command_ + return e; + } + ++/* This exists to prevent long chains of events from filling up memory (https://bugs.launchpad.net/ubuntu/+source/beignet/+bug/1354086). Call only after the dependencies are complete, or failed and marked as such in this event's status, or when this event is being destroyed */ ++LOCAL void ++cl_event_delete_depslist(cl_event event) ++{ ++ CL_OBJECT_LOCK(event); ++ cl_event *old_depend_events = event->depend_events; ++ int depend_count = event->depend_event_num; ++ event->depend_event_num = 0; ++ event->depend_events = NULL; ++ CL_OBJECT_UNLOCK(event); ++ if (old_depend_events) { ++ assert(depend_count); ++ for (int i = 0; i < depend_count; i++) { ++ cl_event_delete(old_depend_events[i]); ++ } ++ cl_free(old_depend_events); ++ } ++} ++ + LOCAL void + cl_event_delete(cl_event event) + { +@@ -200,13 +219,7 @@ cl_event_delete(cl_event event) + + assert(list_node_out_of_list(&event->enqueue_node)); + +- if (event->depend_events) { +- assert(event->depend_event_num); +- for (i = 0; i < event->depend_event_num; i++) { +- cl_event_delete(event->depend_events[i]); +- } +- cl_free(event->depend_events); +- } ++ cl_event_delete_depslist(event); + + /* Free all the callbacks. Last ref, no need to lock. */ + while (!list_empty(&event->callbacks)) { +@@ -566,8 +579,12 @@ cl_event_exec(cl_event event, cl_int exe + assert(depend_status <= CL_COMPLETE || ignore_depends || exec_to_status == CL_QUEUED); + if (depend_status < CL_COMPLETE) { // Error happend, cancel exec. + ret = cl_event_set_status(event, depend_status); ++ cl_event_delete_depslist(event); + return depend_status; + } ++ if (depend_status == CL_COMPLETE) { // Avoid memory leak ++ cl_event_delete_depslist(event); ++ } + + if (cur_status <= exec_to_status) { + return ret; +--- a/src/cl_event.h ++++ b/src/cl_event.h +@@ -44,7 +44,7 @@ typedef struct _cl_event { + cl_command_type event_type; /* Event type. */ + cl_bool is_barrier; /* Is this event a barrier */ + cl_int status; /* The execution status */ +- cl_event *depend_events; /* The events must complete before this. */ ++ cl_event *depend_events; /* The events must complete before this. May disappear after they have completed - see cl_event_delete_depslist*/ + cl_uint depend_event_num; /* The depend events number. */ + list_head callbacks; /* The events The event callback functions */ + list_node enqueue_node; /* The node in the enqueue list. */ diff --git a/beignet-grammar.patch b/beignet-grammar.patch new file mode 100644 index 0000000..422e8aa --- /dev/null +++ b/beignet-grammar.patch @@ -0,0 +1,61 @@ +Description: Fix grammar in documentation + +Author: Rebecca N. Palmer +Forwarded: https://lists.freedesktop.org/archives/beignet/2017-October/009179.html + +--- a/docs/Beignet/Backend.mdwn ++++ b/docs/Beignet/Backend.mdwn +@@ -9,10 +9,10 @@ Status + ------ + + After two years development, beignet is mature now. It now supports all the +-OpenCL 1.2 mandatory features. Beignet get almost 100% pass rate with both +-OpenCV 3.0 test suite and the piglit opencl test suite. There are some +-performance tuning related items remained, see [[here|Backend/TODO]] for a +-(incomplete) lists of things to do. ++OpenCL 1.2 mandatory features. Beignet gets almost 100% pass rate with both ++the OpenCV 3.0 test suite and the piglit opencl test suite. There are some ++performance tuning related items remained, see [[here|Backend/TODO]] for an ++(incomplete) list of things to do. + + Interface with the run-time + --------------------------- +@@ -61,7 +61,7 @@ Environment variables are used all over + - `OCL_OUTPUT_REG_ALLOC` `(0 or 1)`. Output Gen register allocations, including + virtual register to physical register mapping, live ranges. + +-- `OCL_OUTPUT_BUILD_LOG` `(0 or 1)`. Output error messages if there is any ++- `OCL_OUTPUT_BUILD_LOG` `(0 or 1)`. Output error messages if there are any + during CL kernel compiling and linking. + + - `OCL_OUTPUT_CFG` `(0 or 1)`. Output control flow graph in .dot file. +@@ -70,22 +70,22 @@ Environment variables are used all over + but without instructions in each BasicBlock. + + - `OCL_PRE_ALLOC_INSN_SCHEDULE` `(0 or 1)`. The instruction scheduler in +- beignet are currently splitted into two passes: before and after register +- allocation. The pre-alloc scheduler tend to decrease register pressure. ++ beignet is currently split into two passes: before and after register ++ allocation. The pre-alloc scheduler tends to decrease register pressure. + This variable is used to disable/enable pre-alloc scheduler. This pass is + disabled now for some bugs. + + - `OCL_POST_ALLOC_INSN_SCHEDULE` `(0 or 1)`. Disable/enable post-alloc +- instruction scheduler. The post-alloc scheduler tend to reduce instruction ++ instruction scheduler. The post-alloc scheduler tends to reduce instruction + latency. By default, this is enabled now. + +-- `OCL_SIMD16_SPILL_THRESHOLD` `(0 to 256)`. Tune how much registers can be +- spilled under SIMD16. Default value is 16. We find spill too much register +- under SIMD16 is not as good as fall back to SIMD8 mode. So we set the ++- `OCL_SIMD16_SPILL_THRESHOLD` `(0 to 256)`. Tune how many registers can be ++ spilled under SIMD16. Default value is 16. We find spilling too many registers ++ under SIMD16 is not as good as falling back to SIMD8 mode. So we set the + variable to control spilled register number under SIMD16. + + - `OCL_USE_PCH` `(0 or 1)`. The default value is 1. If it is enabled, we use +- a pre compiled header file which include all basic ocl headers. This would ++ a pre compiled header file which includes all basic ocl headers. This would + reduce the compile time. + + Implementation details diff --git a/beignet-in-order-queue.patch b/beignet-in-order-queue.patch new file mode 100644 index 0000000..78168fa --- /dev/null +++ b/beignet-in-order-queue.patch @@ -0,0 +1,381 @@ +Description: Make in-order command queues actually be in-order + +When beignet added out-of-order execution support (7fd45f15), +it made *all* queues use it, even ones that are nominally in-order. + +While using out-of-order queues is probably a good idea when possible +(for performance), the OpenCL spec does not allow it to be the default. + +Author: Rebecca N. Palmer +Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009213.html + +--- a/src/cl_api.c ++++ b/src/cl_api.c +@@ -276,7 +276,7 @@ clEnqueueSVMFree (cl_command_queue command_queue, + data->size = num_svm_pointers; + data->ptr = user_data; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -422,7 +422,7 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue, + data->const_ptr = src_ptr; + data->size = size; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -434,6 +434,9 @@ cl_int clEnqueueSVMMemcpy (cl_command_queue command_queue, + break; + } + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_copy) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + } while(0); + +@@ -511,7 +514,7 @@ cl_int clEnqueueSVMMemFill (cl_command_queue command_queue, + data->pattern_size = pattern_size; + data->size = size; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +--- a/src/cl_api_kernel.c ++++ b/src/cl_api_kernel.c +@@ -223,6 +223,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, + count *= global_wk_sz_rem[2] ? 2 : 1; + + const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem}; ++ cl_bool allow_immediate_submit = cl_command_queue_allow_bypass_submit(command_queue); + /* Go through the at most 8 cases and euque if there is work items left */ + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { +@@ -263,7 +264,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue, + break; + } + +- err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED), CL_FALSE); ++ err = cl_event_exec(e, ((allow_immediate_submit && event_status == CL_COMPLETE) ? CL_SUBMITTED : CL_QUEUED), CL_FALSE); + if (err != CL_SUCCESS) { + break; + } +--- a/src/cl_api_mem.c ++++ b/src/cl_api_mem.c +@@ -308,7 +308,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue, + if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) + data->write_map = 1; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -321,6 +321,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue, + } + + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_map) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + + ptr = data->ptr; +@@ -393,7 +396,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue, + data->mem_obj = memobj; + data->ptr = mapped_ptr; + +- if (e_status == CL_COMPLETE) { // No need to wait ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { // No need to wait + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { + break; +@@ -495,7 +498,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue, + data->offset = offset; + data->size = size; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -507,6 +510,9 @@ clEnqueueReadBuffer(cl_command_queue command_queue, + break; + } + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_read) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + } while (0); + +@@ -598,7 +604,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue, + data->offset = offset; + data->size = size; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -610,6 +616,9 @@ clEnqueueWriteBuffer(cl_command_queue command_queue, + break; + } + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_write) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + } while (0); + +@@ -747,7 +756,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue, + data->host_row_pitch = host_row_pitch; + data->host_slice_pitch = host_slice_pitch; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -759,6 +768,9 @@ clEnqueueReadBufferRect(cl_command_queue command_queue, + break; + } + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_read) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + } while (0); + +@@ -898,7 +910,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue, + data->host_row_pitch = host_row_pitch; + data->host_slice_pitch = host_slice_pitch; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -910,6 +922,9 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue, + break; + } + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_write) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + } while (0); + +@@ -1017,7 +1032,7 @@ clEnqueueCopyBuffer(cl_command_queue command_queue, + break; + } + +- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); ++ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + if (err != CL_SUCCESS) { + break; + } +@@ -1207,7 +1222,7 @@ clEnqueueCopyBufferRect(cl_command_queue command_queue, + if (e_status < CL_COMPLETE) { // Error happend, cancel. + err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; + break; +- } else if (e_status == CL_COMPLETE) { ++ } else if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + err = cl_event_exec(e, CL_SUBMITTED, CL_FALSE); + if (err != CL_SUCCESS) { + break; +@@ -1308,7 +1323,7 @@ clEnqueueFillBuffer(cl_command_queue command_queue, + break; + } + +- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); ++ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + if (err != CL_SUCCESS) { + break; + } +@@ -1395,7 +1410,7 @@ clEnqueueMigrateMemObjects(cl_command_queue command_queue, + break; + } + +- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); ++ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + if (err != CL_SUCCESS) { + break; + } +@@ -1574,7 +1589,7 @@ clEnqueueMapImage(cl_command_queue command_queue, + if (map_flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) + data->write_map = 1; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -1587,6 +1602,9 @@ clEnqueueMapImage(cl_command_queue command_queue, + } + + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_map) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + + ptr = data->ptr; +@@ -1764,7 +1782,7 @@ clEnqueueReadImage(cl_command_queue command_queue, + data->row_pitch = row_pitch; + data->slice_pitch = slice_pitch; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -1776,6 +1794,9 @@ clEnqueueReadImage(cl_command_queue command_queue, + break; + } + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_read) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + } while (0); + +@@ -1916,7 +1937,7 @@ clEnqueueWriteImage(cl_command_queue command_queue, + data->row_pitch = row_pitch; + data->slice_pitch = slice_pitch; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -1928,6 +1949,9 @@ clEnqueueWriteImage(cl_command_queue command_queue, + break; + } + cl_command_queue_enqueue_event(command_queue, e); ++ if (blocking_write) { ++ cl_event_wait_for_events_list(1, &e); ++ } + } + } while (0); + +@@ -2062,7 +2086,7 @@ clEnqueueCopyImage(cl_command_queue command_queue, + break; + } + +- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); ++ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + if (err != CL_SUCCESS) { + break; + } +@@ -2173,7 +2197,7 @@ clEnqueueCopyImageToBuffer(cl_command_queue command_queue, + break; + } + +- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); ++ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + if (err != CL_SUCCESS) { + break; + } +@@ -2285,7 +2309,7 @@ clEnqueueCopyBufferToImage(cl_command_queue command_queue, + break; + } + +- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); ++ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + if (err != CL_SUCCESS) { + break; + } +@@ -2395,7 +2419,7 @@ clEnqueueFillImage(cl_command_queue command_queue, + break; + } + +- err = cl_event_exec(e, e_status == CL_COMPLETE ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); ++ err = cl_event_exec(e, (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) ? CL_SUBMITTED : CL_QUEUED, CL_FALSE); + if (err != CL_SUCCESS) { + break; + } +--- a/src/cl_command_queue.h ++++ b/src/cl_command_queue.h +@@ -103,6 +103,11 @@ extern cl_int cl_command_queue_wait_finish(cl_command_queue queue); + extern cl_int cl_command_queue_wait_flush(cl_command_queue queue); + /* Note: Must call this function with queue's lock. */ + extern cl_event *cl_command_queue_record_in_queue_events(cl_command_queue queue, cl_uint *list_num); ++/* Whether it is valid to call cl_event_exec directly, instead of cl_command_queue_enqueue_event */ ++static inline cl_bool cl_command_queue_allow_bypass_submit(cl_command_queue queue){ ++ return (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)/* if out-of-order, always */ ++ || list_empty(&queue->worker.enqueued_events);/* if in-order, only if empty */ ++} + + #endif /* __CL_COMMAND_QUEUE_H__ */ + +--- a/src/cl_command_queue_enqueue.c ++++ b/src/cl_command_queue_enqueue.c +@@ -65,6 +65,8 @@ worker_thread_function(void *Arg) + if (cl_event_is_ready(e) <= CL_COMPLETE) { + list_node_del(&e->enqueue_node); + list_add_tail(&ready_list, &e->enqueue_node); ++ } else if(!(queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)){ ++ break; /* in in-order mode, can't skip over non-ready events */ + } + } + +@@ -80,18 +82,20 @@ worker_thread_function(void *Arg) + CL_OBJECT_UNLOCK(queue); + + /* Do the really job without lock.*/ +- exec_status = CL_SUBMITTED; +- list_for_each_safe(pos, n, &ready_list) +- { +- e = list_entry(pos, _cl_event, enqueue_node); +- cl_event_exec(e, exec_status, CL_FALSE); +- } ++ if (queue->props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /* in in-order mode, need to get each all the way to CL_COMPLETE before starting the next one */ ++ exec_status = CL_SUBMITTED; ++ list_for_each_safe(pos, n, &ready_list) ++ { ++ e = list_entry(pos, _cl_event, enqueue_node); ++ cl_event_exec(e, exec_status, CL_FALSE); ++ } + +- /* Notify all waiting for flush. */ +- CL_OBJECT_LOCK(queue); +- worker->in_exec_status = CL_SUBMITTED; +- CL_OBJECT_NOTIFY_COND(queue); +- CL_OBJECT_UNLOCK(queue); ++ /* Notify all waiting for flush. */ ++ CL_OBJECT_LOCK(queue); ++ worker->in_exec_status = CL_SUBMITTED; ++ CL_OBJECT_NOTIFY_COND(queue); ++ CL_OBJECT_UNLOCK(queue); ++ } + + list_for_each_safe(pos, n, &ready_list) + { +--- a/src/cl_gl_api.c ++++ b/src/cl_gl_api.c +@@ -188,7 +188,7 @@ cl_int clEnqueueAcquireGLObjects (cl_command_queue command_queue, + data = &e->exec_data; + data->type = EnqueueReturnSuccesss; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { +@@ -274,7 +274,7 @@ cl_int clEnqueueReleaseGLObjects (cl_command_queue command_queue, + data = &e->exec_data; + data->type = EnqueueReturnSuccesss; + +- if (e_status == CL_COMPLETE) { ++ if (cl_command_queue_allow_bypass_submit(command_queue) && (e_status == CL_COMPLETE)) { + // Sync mode, no need to queue event. + err = cl_event_exec(e, CL_COMPLETE, CL_FALSE); + if (err != CL_SUCCESS) { diff --git a/beignet-llvm10-support.patch b/beignet-llvm10-support.patch new file mode 100644 index 0000000..7b51dd8 --- /dev/null +++ b/beignet-llvm10-support.patch @@ -0,0 +1,334 @@ +*Does not work - not in use* + +Description: Add LLVM 10 support + +Remove -std=c++0x, as LLVM 10 requires at least c++14 (the default) +Note that this triggers a gcc bug, so build with clang +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93299 + +BasicBlockPass no longer exists; as they suggest, replace it with +FunctionPass with a loop over BasicBlocks +https://github.com/llvm/llvm-project/commit/9f0ff0b2634bab6a5be8dace005c9eb24d386dd1#diff-bddbe5e4c647cb67298584000b67dea1 +Return true from IntrinsicLoweringPass as it can modify its input +(possibly a bug before?) + +setAlignment now takes a MaybeAlign not a uint + +Don't call initializeDominatorTreeWrapperPassPass and +initializeLoopInfoWrapperPassPass, as they no longer exist + +Add explicit template initialization to avoid an undefined symbol + +###does not work### +Pass clang libs as a single string to prevent them being converted +to -Wl,-Bstatic -lclang... -Wl,-Bdynamic, as that causes a +multiply defined options crash when run +(multiple dynamic LLVMs??) +builtin_acos_float()clang (LLVM option parsing): for the --pgo-warn-misexpect option: may only occur zero or one times! + + +#--- a/CMake/FindLLVM.cmake +#+++ b/CMake/FindLLVM.cmake +#@@ -120,11 +120,14 @@ macro(add_one_lib name) +# endif (LLVM_SYSTEM_LIBS_ORIG) +# endif (LLVM_VERSION_NODOT VERSION_GREATER 34) +# +#+#something harmless because whitespace at start is an error +#+set(CLANG_LIBRARIES "-ldl") +# macro(add_one_lib name) +# FIND_LIBRARY(CLANG_LIB +# NAMES ${name} +# PATHS ${LLVM_LIBRARY_DIR} NO_DEFAULT_PATH) +#- set(CLANG_LIBRARIES ${CLANG_LIBRARIES} ${CLANG_LIB}) +#+ set(CLANG_LIBRARIES "${CLANG_LIBRARIES} ${CLANG_LIB}") +#+ message(STATUS "clanglibs name ${name} this ${CLANG_LIB} all ${CLANG_LIBRARIES} ") +# unset(CLANG_LIB CACHE) +# endmacro() +# +#--- a/CMakeLists.txt +#+++ b/CMakeLists.txt +#@@ -79,7 +78,7 @@ elseif (COMPILER STREQUAL "CLANG") +# elseif (COMPILER STREQUAL "ICC") +# set (CMAKE_C_CXX_FLAGS "${CMAKE_C_CXX_FLAGS} -wd2928 -Wall -fPIC -fstrict-aliasing -fp-model fast -msse4.1 -Wl,-E") +# endif () +#-set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} -std=c++0x -Wno-invalid-offsetof") +#+set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_C_CXX_FLAGS} -Wno-invalid-offsetof") +# set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_CXX_FLAGS}") +# set (CMAKE_CXX_FLAGS_DEBUG "-O0 -g -DGBE_DEBUG=1") +# set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DGBE_DEBUG=1") +--- beignet-1.3.2.orig/backend/src/backend/program.cpp ++++ beignet-1.3.2/backend/src/backend/program.cpp +@@ -695,8 +695,12 @@ namespace gbe { + ); + + clang::CompilerInvocation::CreateFromArgs(*CI, ++#if LLVM_VERSION_MAJOR < 10 + &args[0], + &args[0] + args.size(), ++#else ++ clang::ArrayRef(args), ++#endif + Diags); + // Create the compiler instance + clang::CompilerInstance Clang; +@@ -1248,8 +1252,12 @@ EXTEND_QUOTE: + // Create the compiler invocation + std::unique_ptr CI(new clang::CompilerInvocation); + return clang::CompilerInvocation::CreateFromArgs(*CI, ++#if LLVM_VERSION_MAJOR < 10 + &args[0], + &args[0] + args.size(), ++#else ++ clang::ArrayRef(args), ++#endif + Diags); + } + #endif +--- beignet-1.3.2.orig/backend/src/llvm/llvm_gen_backend.hpp ++++ beignet-1.3.2/backend/src/llvm/llvm_gen_backend.hpp +@@ -130,10 +130,10 @@ namespace gbe + llvm::FunctionPass *createGenPass(ir::Unit &unit); + + /*! Remove the GEP instructions */ +- llvm::BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit); ++ llvm::FunctionPass *createRemoveGEPPass(const ir::Unit &unit); + + /*! Merge load/store if possible */ +- llvm::BasicBlockPass *createLoadStoreOptimizationPass(); ++ llvm::FunctionPass *createLoadStoreOptimizationPass(); + + /*! Scalarize all vector op instructions */ + llvm::FunctionPass* createScalarizePass(); +@@ -141,7 +141,7 @@ namespace gbe + llvm::ModulePass* createBarrierNodupPass(bool); + + /*! Convert the Intrinsic call to gen function */ +- llvm::BasicBlockPass *createIntrinsicLoweringPass(); ++ llvm::FunctionPass *createIntrinsicLoweringPass(); + + /*! Passer the printf function call. */ + llvm::FunctionPass* createPrintfParserPass(ir::Unit &unit); +--- beignet-1.3.2.orig/backend/src/llvm/llvm_intrinsic_lowering.cpp ++++ beignet-1.3.2/backend/src/llvm/llvm_intrinsic_lowering.cpp +@@ -29,12 +29,12 @@ + using namespace llvm; + + namespace gbe { +- class InstrinsicLowering : public BasicBlockPass ++ class InstrinsicLowering : public FunctionPass + { + public: + static char ID; + InstrinsicLowering() : +- BasicBlockPass(ID) {} ++ FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + +@@ -93,9 +93,9 @@ namespace gbe { + CI->eraseFromParent(); + return NewCI; + } +- virtual bool runOnBasicBlock(BasicBlock &BB) ++ virtual bool runOnFunction(Function &F) + { +- bool changedBlock = false; ++ for (BasicBlock &BB : F) { + Module *M = BB.getParent()->getParent(); + + DataLayout TD(M); +@@ -159,13 +159,14 @@ namespace gbe { + } + } + } +- return changedBlock; ++ } ++ return true; + } + }; + + char InstrinsicLowering::ID = 0; + +- BasicBlockPass *createIntrinsicLoweringPass() { ++ FunctionPass *createIntrinsicLoweringPass() { + return new InstrinsicLowering(); + } + } // end namespace +--- beignet-1.3.2.orig/backend/src/llvm/llvm_loadstore_optimization.cpp ++++ beignet-1.3.2/backend/src/llvm/llvm_loadstore_optimization.cpp +@@ -26,13 +26,13 @@ + + using namespace llvm; + namespace gbe { +- class GenLoadStoreOptimization : public BasicBlockPass { ++ class GenLoadStoreOptimization : public FunctionPass { + + public: + static char ID; + ScalarEvolution *SE; + const DataLayout *TD; +- GenLoadStoreOptimization() : BasicBlockPass(ID) {} ++ GenLoadStoreOptimization() : FunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38 +@@ -45,7 +45,9 @@ namespace gbe { + AU.setPreservesCFG(); + } + +- virtual bool runOnBasicBlock(BasicBlock &BB) { ++ virtual bool runOnFunction(Function &F) { ++ bool changedAnyBlock = false; ++ for (BasicBlock &BB : F) { + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38 + SE = &getAnalysis().getSE(); + #else +@@ -59,7 +61,9 @@ namespace gbe { + #else + TD = getAnalysisIfAvailable(); + #endif +- return optimizeLoadStore(BB); ++ changedAnyBlock = optimizeLoadStore(BB) | changedAnyBlock; ++ } ++ return changedAnyBlock; + } + Type *getValueType(Value *insn); + Value *getPointerOperand(Value *I); +@@ -148,7 +152,11 @@ namespace gbe { + values.push_back(merged[i]); + } + LoadInst *ld = cast(merged[0]); ++#if LLVM_VERSION_MAJOR < 10 + unsigned align = ld->getAlignment(); ++#else ++ MaybeAlign align = ld->getAlign(); ++#endif + unsigned addrSpace = ld->getPointerAddressSpace(); + // insert before first load + Builder.SetInsertPoint(ld); +@@ -231,7 +239,11 @@ namespace gbe { + + unsigned addrSpace = st->getPointerAddressSpace(); + ++#if LLVM_VERSION_MAJOR < 10 + unsigned align = st->getAlignment(); ++#else ++ MaybeAlign align = st->getAlign(); ++#endif + // insert before the last store + Builder.SetInsertPoint(merged[size-1]); + +@@ -325,7 +337,7 @@ namespace gbe { + return changed; + } + +- BasicBlockPass *createLoadStoreOptimizationPass() { ++ FunctionPass *createLoadStoreOptimizationPass() { + return new GenLoadStoreOptimization(); + } + }; +--- beignet-1.3.2.orig/backend/src/llvm/llvm_passes.cpp ++++ beignet-1.3.2/backend/src/llvm/llvm_passes.cpp +@@ -37,7 +37,7 @@ + #include "sys/map.hpp" + + using namespace llvm; +- ++template class cfg::Update; + namespace gbe + { + bool isKernelFunction(const llvm::Function &F) { +@@ -219,13 +219,13 @@ namespace gbe + return offset; + } + +- class GenRemoveGEPPasss : public BasicBlockPass ++ class GenRemoveGEPPasss : public FunctionPass + { + + public: + static char ID; + GenRemoveGEPPasss(const ir::Unit &unit) : +- BasicBlockPass(ID), ++ FunctionPass(ID), + unit(unit) {} + const ir::Unit &unit; + void getAnalysisUsage(AnalysisUsage &AU) const { +@@ -242,16 +242,18 @@ namespace gbe + + bool simplifyGEPInstructions(GetElementPtrInst* GEPInst); + +- virtual bool runOnBasicBlock(BasicBlock &BB) ++ virtual bool runOnFunction(Function &F) + { +- bool changedBlock = false; ++ bool changedAnyBlock = false; ++ for (BasicBlock &BB : F) { + iplist::iterator I = BB.getInstList().begin(); + for (auto nextI = I, E = --BB.getInstList().end(); I != E; I = nextI) { + iplist::iterator I = nextI++; + if(GetElementPtrInst* gep = dyn_cast(&*I)) +- changedBlock = (simplifyGEPInstructions(gep) || changedBlock); ++ changedAnyBlock = (simplifyGEPInstructions(gep) | changedAnyBlock); + } +- return changedBlock; ++ } ++ return changedAnyBlock; + } + }; + +@@ -367,7 +369,7 @@ namespace gbe + return true; + } + +- BasicBlockPass *createRemoveGEPPass(const ir::Unit &unit) { ++ FunctionPass *createRemoveGEPPass(const ir::Unit &unit) { + return new GenRemoveGEPPasss(unit); + } + } /* namespace gbe */ +--- beignet-1.3.2.orig/backend/src/llvm/llvm_sampler_fix.cpp ++++ beignet-1.3.2/backend/src/llvm/llvm_sampler_fix.cpp +@@ -33,11 +33,13 @@ namespace gbe { + class SamplerFix : public FunctionPass { + public: + SamplerFix() : FunctionPass(ID) { ++#if LLVM_VERSION_MAJOR < 10 + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 35 + initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); + #else + initializeDominatorTreePass(*PassRegistry::getPassRegistry()); + #endif ++#endif + } + + bool visitCallInst(CallInst *I) { +--- a/backend/src/llvm/llvm_gen_backend.cpp ++++ b/backend/src/llvm/llvm_gen_backend.cpp +@@ -575,10 +575,12 @@ namespace gbe + has_errors(false), + legacyMode(true) + { ++#if LLVM_VERSION_MAJOR < 10 + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 37 + initializeLoopInfoWrapperPassPass(*PassRegistry::getPassRegistry()); + #else + initializeLoopInfoPass(*PassRegistry::getPassRegistry()); ++#endif + #endif + pass = PASS_EMIT_REGISTERS; + } +--- a/backend/src/llvm/llvm_scalarize.cpp ++++ b/backend/src/llvm/llvm_scalarize.cpp +@@ -96,10 +96,12 @@ namespace gbe { + + Scalarize() : FunctionPass(ID) + { ++#if LLVM_VERSION_MAJOR < 10 + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 35 + initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); + #else + initializeDominatorTreePass(*PassRegistry::getPassRegistry()); ++#endif + #endif + } + diff --git a/beignet-llvm6-support.patch b/beignet-llvm6-support.patch new file mode 100644 index 0000000..07e89f7 --- /dev/null +++ b/beignet-llvm6-support.patch @@ -0,0 +1,37 @@ +Description: Support LLVM 6 + +LLVMContext::setDiagnosticHandler is renamed +LoopInfo::markAsRemoved is partly replaced by LoopInfo::erase, +but that doesn't remove the loop from the queue + +Author: Rebecca N. Palmer +Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009211.html (original), https://lists.freedesktop.org/archives/beignet/2019-January/009222.html (#913141 fix) + +--- a/backend/src/llvm/llvm_to_gen.cpp ++++ b/backend/src/llvm/llvm_to_gen.cpp +@@ -322,7 +322,11 @@ namespace gbe + DataLayout DL(&mod); + + gbeDiagnosticContext dc; ++#if LLVM_VERSION_MAJOR >= 6 ++ mod.getContext().setDiagnosticHandlerCallBack(&gbeDiagnosticHandler,&dc); ++#else + mod.getContext().setDiagnosticHandler(&gbeDiagnosticHandler,&dc); ++#endif + + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 37 + mod.setDataLayout(DL); +--- a/backend/src/llvm/llvm_unroll.cpp ++++ b/backend/src/llvm/llvm_unroll.cpp +@@ -205,7 +205,10 @@ namespace gbe { + if (parentTripCount != 0 && currTripCount * parentTripCount > 32) { + //Don't change the unrollID if doesn't force unroll. + //setUnrollID(parentL, false); +-#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38 ++#if LLVM_VERSION_MAJOR >= 6 ++ LPM.markLoopAsDeleted(*parentL); ++ loopInfo.erase(parentL); ++#elif LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 38 + loopInfo.markAsRemoved(parentL); + #else + LPM.deleteLoopFromQueue(parentL); diff --git a/beignet-llvm7-support.patch b/beignet-llvm7-support.patch new file mode 100644 index 0000000..9d460ce --- /dev/null +++ b/beignet-llvm7-support.patch @@ -0,0 +1,106 @@ +Description: Add LLVM 7 support + +1.Change linking order, as clangCodeGen now links to clangFrontend +2.Pass references not pointers to WriteBitcodeToFile and CloneModule +3.Add the headers that LoopSimplifyID, LCSSAID and +some create*Pass have moved to +4.Define our DEBUG whether or not we just undefined LLVM's +(theirs is now LLVM_DEBUG, but we never actually use it) + +Author: Rebecca N. Palmer +Bug-Debian: https://bugs.debian.org/912787 +Forwarded: https://lists.freedesktop.org/archives/beignet/2018-July/009212.html + +--- a/CMake/FindLLVM.cmake ++++ b/CMake/FindLLVM.cmake +@@ -113,10 +113,10 @@ macro(add_one_lib name) + endmacro() + + #Assume clang lib path same as llvm lib path ++add_one_lib("clangCodeGen") + add_one_lib("clangFrontend") + add_one_lib("clangSerialization") + add_one_lib("clangDriver") +-add_one_lib("clangCodeGen") + add_one_lib("clangSema") + add_one_lib("clangStaticAnalyzerFrontend") + add_one_lib("clangStaticAnalyzerCheckers") +--- a/backend/src/backend/gen_program.cpp ++++ b/backend/src/backend/gen_program.cpp +@@ -449,7 +449,11 @@ namespace gbe { + #ifdef GBE_COMPILER_AVAILABLE + std::string str; + llvm::raw_string_ostream OS(str); ++#if LLVM_VERSION_MAJOR >= 7 ++ llvm::WriteBitcodeToFile(*((llvm::Module*)prog->module), OS); ++#else + llvm::WriteBitcodeToFile((llvm::Module*)prog->module, OS); ++#endif + std::string& bin_str = OS.str(); + int llsz = bin_str.size(); + *binary = (char *)malloc(sizeof(char) * (llsz+1) ); +@@ -540,7 +544,11 @@ namespace gbe { + &modRef); + src = llvm::unwrap(modRef); + } ++#if LLVM_VERSION_MAJOR >= 7 ++ llvm::Module* clone = llvm::CloneModule(*src).release(); ++#else + llvm::Module* clone = llvm::CloneModule(src).release(); ++#endif + if (LLVMLinkModules2(wrap(dst), wrap(clone))) { + #elif LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 37 + if (LLVMLinkModules(wrap(dst), wrap(src), LLVMLinkerPreserveSource_Removed, &errMsg)) { +--- a/backend/src/backend/program.cpp ++++ b/backend/src/backend/program.cpp +@@ -794,7 +794,11 @@ namespace gbe { + llvm::raw_fd_ostream ostream (dumpSPIRBinaryName.c_str(), + err, llvm::sys::fs::F_None); + if (!err) ++#if LLVM_VERSION_MAJOR<7 + llvm::WriteBitcodeToFile(*out_module, ostream); ++#else ++ llvm::WriteBitcodeToFile(**out_module, ostream); ++#endif + } + #endif + return true; +--- a/backend/src/llvm/llvm_bitcode_link.cpp ++++ b/backend/src/llvm/llvm_bitcode_link.cpp +@@ -340,7 +340,11 @@ namespace gbe + /* We use beignet's bitcode as dst because it will have a lot of + lazy functions which will not be loaded. */ + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 39 ++#if LLVM_VERSION_MAJOR >= 7 ++ llvm::Module * linked_module = llvm::CloneModule(*(llvm::Module*)mod).release(); ++#else + llvm::Module * linked_module = llvm::CloneModule((llvm::Module*)mod).release(); ++#endif + if(LLVMLinkModules2(wrap(clonedLib), wrap(linked_module))) { + #else + char* errorMsg; +--- a/backend/src/llvm/llvm_includes.hpp ++++ b/backend/src/llvm/llvm_includes.hpp +@@ -89,6 +89,10 @@ + #include "llvm/CodeGen/IntrinsicLowering.h" + + #include "llvm/Transforms/Scalar.h" ++#if LLVM_VERSION_MAJOR >= 7 ++#include "llvm/Transforms/Utils.h" ++#include "llvm/Transforms/InstCombine/InstCombine.h" ++#endif + #include "llvm/MC/MCAsmInfo.h" + #include "llvm/MC/MCContext.h" + #include "llvm/MC/MCInstrInfo.h" +--- a/backend/src/llvm/ExpandLargeIntegers.cpp ++++ b/backend/src/llvm/ExpandLargeIntegers.cpp +@@ -99,8 +99,8 @@ using namespace llvm; + + #ifdef DEBUG + #undef DEBUG +- #define DEBUG(...) + #endif ++#define DEBUG(...) + // Break instructions up into no larger than 64-bit chunks. + static const unsigned kChunkBits = 64; + static const unsigned kChunkBytes = kChunkBits / CHAR_BIT; diff --git a/beignet-llvm8-support.patch b/beignet-llvm8-support.patch new file mode 100644 index 0000000..b17a180 --- /dev/null +++ b/beignet-llvm8-support.patch @@ -0,0 +1,71 @@ +Description: Fix build with LLVM/Clang 8 + +Origin: (partly) FreeBSD https://svnweb.freebsd.org/ports/head/lang/beignet/files/patch-llvm8?view=markup +Author: Jan Beich, Rebecca N. Palmer + +--- a/backend/src/CMakeLists.txt ++++ b/backend/src/CMakeLists.txt +@@ -168,6 +168,7 @@ add_dependencies(gbe beignet_bitcode) + endif (NOT (USE_STANDALONE_GBE_COMPILER STREQUAL "true")) + + add_library(gbeinterp SHARED gbe_bin_interpreter.cpp) ++target_link_libraries(gbeinterp ${LLVM_MODULE_LIBS} ${LLVM_SYSTEM_LIBS}) + + if (LLVM_VERSION_NODOT VERSION_EQUAL 34) + find_library(TERMINFO NAMES tinfo ncurses) +--- a/backend/src/llvm/llvm_gen_backend.cpp ++++ b/backend/src/llvm/llvm_gen_backend.cpp +@@ -3073,14 +3073,22 @@ namespace gbe + + + static unsigned getChildNo(BasicBlock *bb) { ++#if LLVM_VERSION_MAJOR < 8 + TerminatorInst *term = bb->getTerminator(); ++#else ++ Instruction *term = bb->getTerminator(); ++#endif + return term->getNumSuccessors(); + } + + // return NULL if index out-range of children number + static BasicBlock *getChildPossible(BasicBlock *bb, unsigned index) { + ++#if LLVM_VERSION_MAJOR < 8 + TerminatorInst *term = bb->getTerminator(); ++#else ++ Instruction *term = bb->getTerminator(); ++#endif + unsigned childNo = term->getNumSuccessors(); + BasicBlock *child = NULL; + if(index < childNo) { +--- a/backend/src/backend/gen_register.hpp ++++ b/backend/src/backend/gen_register.hpp +@@ -225,6 +225,7 @@ namespace gbe + uint32_t width, + uint32_t hstride) + { ++ this->value.reg = 0;//avoid subgroup crash + this->type = type; + this->file = file; + this->nr = nr; +--- a/backend/src/libocl/tmpl/ocl_integer.tmpl.cl ++++ b/backend/src/libocl/tmpl/ocl_integer.tmpl.cl +@@ -216,13 +216,14 @@ OVERLOADABLE ulong mad_sat(ulong a, ulon + return __gen_ocl_mad_sat(a, b, c); + } + +-OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { return (x << y) | (x >> (8 - y)); } ++// the 'volatile' is to make the LLVM optimizer leave these alone, as it would convert them to intrinsics (fshl/fshr) that we don't implement ++OVERLOADABLE uchar __rotate_left(uchar x, uchar y) { volatile uchar z; z = (x << y); return z | (x >> (8 - y)); } + OVERLOADABLE char __rotate_left(char x, char y) { return __rotate_left((uchar)x, (uchar)y); } +-OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { return (x << y) | (x >> (16 - y)); } ++OVERLOADABLE ushort __rotate_left(ushort x, ushort y) { volatile ushort z; z = (x << y); return z | (x >> (16 - y)); } + OVERLOADABLE short __rotate_left(short x, short y) { return __rotate_left((ushort)x, (ushort)y); } +-OVERLOADABLE uint __rotate_left(uint x, uint y) { return (x << y) | (x >> (32 - y)); } ++OVERLOADABLE uint __rotate_left(uint x, uint y) { volatile uint z; z = (x << y); return z | (x >> (32 - y)); } + OVERLOADABLE int __rotate_left(int x, int y) { return __rotate_left((uint)x, (uint)y); } +-OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { return (x << y) | (x >> (64 - y)); } ++OVERLOADABLE ulong __rotate_left(ulong x, ulong y) { volatile ulong z; z = (x << y); return z | (x >> (64 - y)); } + OVERLOADABLE long __rotate_left(long x, long y) { return __rotate_left((ulong)x, (ulong)y); } + #define DEF(type, m) OVERLOADABLE type rotate(type x, type y) { return __rotate_left(x, (type)(y & m)); } + DEF(char, 7) diff --git a/beignet-llvm9-support.patch b/beignet-llvm9-support.patch new file mode 100644 index 0000000..5bcb516 --- /dev/null +++ b/beignet-llvm9-support.patch @@ -0,0 +1,113 @@ +Description: Fix build with LLVM/Clang 9 + +Origin: (mostly) FreeBSD https://svnweb.freebsd.org/ports/head/lang/beignet/files/patch-llvm9?view=markup +Author: Jan Beich, Rebecca N. Palmer + +--- a/CMake/FindLLVM.cmake ++++ b/CMake/FindLLVM.cmake +@@ -126,6 +126,9 @@ macro(add_one_lib name) + add_one_lib("clangStaticAnalyzerCore") + add_one_lib("clangAnalysis") + add_one_lib("clangEdit") ++if (LLVM_VERSION_NODOT VERSION_GREATER 80) ++add_one_lib("clangASTMatchers") ++endif (LLVM_VERSION_NODOT VERSION_GREATER 80) + add_one_lib("clangAST") + add_one_lib("clangParse") + add_one_lib("clangSema") +--- a/backend/src/llvm/llvm_intrinsic_lowering.cpp ++++ b/backend/src/llvm/llvm_intrinsic_lowering.cpp +@@ -77,7 +77,11 @@ namespace gbe { + std::vector ParamTys; + for (Value** I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back((*I)->getType()); ++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90 ++ FunctionCallee FCache = M->getOrInsertFunction(NewFn, ++#else + Constant* FCache = M->getOrInsertFunction(NewFn, ++#endif + FunctionType::get(RetTy, ParamTys, false)); + + IRBuilder<> Builder(CI->getParent(), BasicBlock::iterator(CI)); +--- a/backend/src/llvm/llvm_sampler_fix.cpp ++++ b/backend/src/llvm/llvm_sampler_fix.cpp +@@ -82,7 +82,11 @@ namespace gbe { + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40 + Module *M = I->getParent()->getParent()->getParent(); + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 50 ++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90 ++ FunctionCallee samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType()); ++#else + Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType()); ++#endif + #else + Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType(), nullptr); + #endif +@@ -124,7 +128,11 @@ namespace gbe { + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40 + Module *M = I->getParent()->getParent()->getParent(); + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 50 ++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90 ++ FunctionCallee samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType()); ++#else + Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType()); ++#endif + #else + Value* samplerCvt = M->getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I->getOperand(0)->getType(), nullptr); + #endif +--- a/backend/src/llvm/llvm_profiling.cpp ++++ b/backend/src/llvm/llvm_profiling.cpp +@@ -163,10 +163,18 @@ namespace gbe + // __gen_ocl_store_timestamp(int nth, int type); + Value *Args[2] = {ConstantInt::get(intTy, pointNum++), ConstantInt::get(intTy, profilingType)}; + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 50 ++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90 ++ builder->CreateCall(module->getOrInsertFunction( ++#else + builder->CreateCall(cast(module->getOrInsertFunction( ++#endif + "__gen_ocl_calc_timestamp", Type::getVoidTy(module->getContext()), + IntegerType::getInt32Ty(module->getContext()), ++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90 ++ IntegerType::getInt32Ty(module->getContext())), ++#else + IntegerType::getInt32Ty(module->getContext()))), ++#endif + ArrayRef(Args)); + #else + builder->CreateCall(cast(module->getOrInsertFunction( +@@ -185,10 +193,18 @@ namespace gbe + Value *Args2[2] = {profilingBuf, ConstantInt::get(intTy, profilingType)}; + + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 50 ++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90 ++ builder->CreateCall(module->getOrInsertFunction( ++#else + builder->CreateCall(cast(module->getOrInsertFunction( ++#endif + "__gen_ocl_store_profiling", Type::getVoidTy(module->getContext()), + ptrTy, ++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90 ++ IntegerType::getInt32Ty(module->getContext())), ++#else + IntegerType::getInt32Ty(module->getContext()))), ++#endif + ArrayRef(Args2)); + #else + builder->CreateCall(cast(module->getOrInsertFunction( +--- a/backend/src/llvm/llvm_device_enqueue.cpp ++++ b/backend/src/llvm/llvm_device_enqueue.cpp +@@ -398,8 +398,13 @@ namespace gbe { + std::vector ParamTys; + for (Value** iter = args.begin(); iter != args.end(); ++iter) + ParamTys.push_back((*iter)->getType()); ++#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 90 ++ CallInst* newCI = builder.CreateCall(mod->getOrInsertFunction( ++ "__gen_enqueue_kernel_slm", FunctionType::get(intTy, ParamTys, false)), args); ++#else + CallInst* newCI = builder.CreateCall(cast(mod->getOrInsertFunction( + "__gen_enqueue_kernel_slm", FunctionType::get(intTy, ParamTys, false))), args); ++#endif + CI->replaceAllUsesWith(newCI); + deadInsnSet.insert(CI); + } diff --git a/beignet-reduce-notfound-output.patch b/beignet-reduce-notfound-output.patch new file mode 100644 index 0000000..93924c9 --- /dev/null +++ b/beignet-reduce-notfound-output.patch @@ -0,0 +1,41 @@ +Description: Reduce error spew on unsupported hardware + +Loading beignet on unsupported hardware produces ~20 errors, even +if another ICD is also installed that does support the hardware. +Replace these with one message that explicitly says what to do. + +Author: Rebecca N. Palmer + +--- a/src/cl_device_id.c ++++ b/src/cl_device_id.c +@@ -797,10 +797,10 @@ glk12eu_break: + case PCI_CHIP_SANDYBRIDGE_BRIDGE_S: + case PCI_CHIP_SANDYBRIDGE_S_GT: + // Intel(R) HD Graphics SandyBridge not supported yet ++ default: ++ fprintf(stderr, "beignet-opencl-icd: no supported GPU found, this is probably the wrong opencl-icd package for this hardware\n(If you have multiple ICDs installed and OpenCL works, you can ignore this message)\n"); + ret = NULL; + break; +- default: +- printf("cl_get_gt_device(): error, unknown device: %x\n", device_id); + } + + if (ret == NULL) +--- a/src/intel/intel_driver.c ++++ b/src/intel/intel_driver.c +@@ -263,7 +263,6 @@ if(!intel_driver_is_active(intel)) { + } + + if(!intel_driver_is_active(intel)) { +- fprintf(stderr, "Device open failed, aborting...\n"); + return CL_DEVICE_NOT_FOUND; + } + +@@ -324,7 +323,6 @@ drm_client_t client; + // usually dev_name = "/dev/dri/card%d" + dev_fd = open(dev_name, O_RDWR); + if (dev_fd == -1) { +- fprintf(stderr, "open(\"%s\", O_RDWR) failed: %s\n", dev_name, strerror(errno)); + return 0; + } + diff --git a/beignet-reduce-notfound-output2.patch b/beignet-reduce-notfound-output2.patch new file mode 100644 index 0000000..eb63b91 --- /dev/null +++ b/beignet-reduce-notfound-output2.patch @@ -0,0 +1,61 @@ +Description: Reduce error spew on unsupported or hybrid hardware + +Explicitly check if the device is i915 before calling random ioctl()s +to avoid triggering pointless user-visible error messages if it is not. + +Origin: upstream b70d65ba25a32a965cc122bf944ba14a1aa0a095 +Author: Mark Thompson + +--- a/src/intel/intel_driver.c ++++ b/src/intel/intel_driver.c +@@ -312,6 +312,26 @@ return ret; + } + #endif + ++static int ++intel_driver_check_device(int dev_fd) ++{ ++ // Ensure that this is actually an i915 DRM device. ++ drmVersion *version; ++ int ret; ++ version = drmGetVersion(dev_fd); ++ if (!version) { ++ fprintf(stderr, "drmGetVersion(%d) failed: %s\n", dev_fd, strerror(errno)); ++ close(dev_fd); ++ return 0; ++ } ++ ret = !strcmp(version->name, "i915"); ++ drmFreeVersion(version); ++ // Don't print an error here if this device is using a different driver, ++ // because we might be iterating over multiple devices looking for a ++ // compatible one. ++ return ret; ++} ++ + LOCAL int + intel_driver_init_master(intel_driver_t *driver, const char* dev_name) + { +@@ -326,6 +346,11 @@ if (dev_fd == -1) { + return 0; + } + ++if (!intel_driver_check_device(dev_fd)) { ++ close(dev_fd); ++ return 0; ++} ++ + // Check that we're authenticated + memset(&client, 0, sizeof(drm_client_t)); + ret = ioctl(dev_fd, DRM_IOCTL_GET_CLIENT, &client); +@@ -356,6 +381,11 @@ dev_fd = open(dev_name, O_RDWR); + if (dev_fd == -1) + return 0; + ++if (!intel_driver_check_device(dev_fd)) { ++ close(dev_fd); ++ return 0; ++} ++ + ret = intel_driver_init(driver, dev_fd); + driver->need_close = 1; + diff --git a/beignet-update-docs.patch b/beignet-update-docs.patch new file mode 100644 index 0000000..735bea2 --- /dev/null +++ b/beignet-update-docs.patch @@ -0,0 +1,161 @@ +Description: Update documentation + +Reflect #767148 fix, high precision now being default, and releases. +Clarify what hardware is supported. +Add Debian specific information; recommend the Debian BTS rather than +mostly-inactive upstream contact points. + +Author: Rebecca N. Palmer +Forwarded: partially, most recently https://lists.freedesktop.org/archives/beignet/2019-January/009225.html + +--- a/src/cl_device_id.c ++++ b/src/cl_device_id.c +@@ -907,7 +907,7 @@ cl_self_test(cl_device_id device, cl_sel + } else { + ret = SELF_TEST_SLM_FAIL; + printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (%i, %i, %i)\n" +- "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", ++ "This can usually be fixed by upgrading Linux to >= 4.2,\nsee /usr/share/doc/beignet-dev/Beignet.html or https://www.freedesktop.org/wiki/Software/Beignet/\n", + test_data[0], test_data[1], test_data[2]); + + } +--- a/docs/Beignet/Backend.mdwn ++++ b/docs/Beignet/Backend.mdwn +@@ -37,9 +37,7 @@ Environment variables are used all over + precision math instructions compliant with OpenCL Spec. So we provide a + software version to meet the high precision requirement. Obviously the + software version's performance is not as good as native version supported by +- GEN hardware. What's more, most graphics application don't need this high +- precision, so we choose 0 as the default value. So OpenCL apps do not suffer +- the performance penalty for using high precision math functions. ++ GEN hardware. + + - `OCL_SIMD_WIDTH` `(8 or 16)`. Select the number of lanes per hardware thread, + Normally, you don't need to set it, we will select suitable simd width for +--- /dev/null ++++ b/docs/index.mdwn +@@ -0,0 +1 @@ ++[[!map pages="* and !ikiwiki/*" show=title]] +--- a/docs/Beignet.mdwn ++++ b/docs/Beignet.mdwn +@@ -16,6 +16,8 @@ News + Prerequisite + ------------ + ++(for building the upstream source; Debian packages handle this automatically) ++ + The project depends on the following external libraries: + + - libdrm libraries (libdrm and libdrm\_intel) +@@ -84,7 +86,7 @@ you need to configure it as below: + + CMake will check the dependencies and will complain if it does not find them. + +-`> make` ++`> make utest` + + The cmake will build the backend firstly. Please refer to: + [[OpenCL Gen Backend|Beignet/Backend]] to get more dependencies. +@@ -123,25 +125,17 @@ platform. Beignet also produces various + consistency. This small test framework uses a simple c++ registration system to + register all the unit tests. + +-You need to call setenv.sh in the utests/ directory to set some environment variables +-firstly as below: +- +-`> . setenv.sh` +- +-Then in `utests/`: ++In Debian beignet, the testing tool is in the _beignet-dev_ package, and is run with: + +-`> ./utest_run` ++`> /usr/lib/\`dpkg-architecture -qDEB_HOST_MULTIARCH\`/beignet/utest_run` + +-will run all the unit tests one after the others ++(this name and path may change in future releases - please do not rely on it). It will test the first OpenCL GPU device it finds (which does _not_ have to be a Beignet device). If you want to test a different device, uninstall all other ICDs. To see more options, pass `-h`. + +-`> ./utest_run some_unit_test` ++If you compiled Beignet yourself, you will find this tool in `utests`, and will need to set some environment variables to use it (see `setenv.sh`). + +-will only run `some_unit_test` test. + + On all supported target platform, the pass rate should be 100%. If it is not, you may +-need to refer the "Known Issues" section. Please be noted, the `. setenv.sh` is only +-required to run unit test cases. For all other OpenCL applications, don't execute that +-command. ++need to refer the "Known Issues" section. + + Normally, beignet needs to run under X server environment as normal user. If there isn't X server, + beignet provides two alternative to run: +@@ -151,14 +145,23 @@ beignet provides two alternative to run: + Supported Targets + ----------------- + ++Beignet aims to support Gen7 to Gen9 Intel HD (not PowerVR) integrated GPUs. It is the only OpenCL in Debian for the GPUs of: ++ + * 3rd Generation Intel Core Processors "Ivybridge". + * 3rd Generation Intel Atom Processors "BayTrail". + * 4th Generation Intel Core Processors "Haswell", need kernel patch if your linux kernel older than 4.2, see the "Known Issues" section. ++ ++Beignet should also work on the following, but if CL-GL sharing is not required, [Intel Compute Runtime (Neo)](https://01.org/compute-runtime) (Debian package [intel-opencl-icd](https://packages.debian.org/search?keywords=intel-opencl-icd&searchon=names&exact=1&suite=all§ion=all)) may be better: ++ + * 5th Generation Intel Core Processors "Broadwell". + * 5th Generation Intel Atom Processors "Braswell". +- * 6th Generation Intel Core Processors "Skylake" and "Kabylake". + * 5th Generation Intel Atom Processors "Broxten" or "Apollolake". ++ * 6th+ Generation Intel Core Processors "Skylake", "Kaby Lake", "Coffee Lake", "Comet Lake", "Amber Lake". + ++Not all processor models have an integrated GPU (Beignet does _not_ support running OpenCL on the CPU itself - see [pocl-opencl-icd](https://packages.debian.org/search?keywords=pocl-opencl-icd&searchon=names&exact=1&suite=all§ion=all) for that). Systems that also have a discrete GPU may disable the integrated GPU: check _xrandr --listproviders_ or see [here](https://nouveau.freedesktop.org/wiki/Optimus/). ++ ++Attempting to run Beignet on unsupported hardware should return CL_DEVICE_NOT_FOUND; if it does anything else (especially crashing) please [report a bug](#howtocontribute). ++ + OpenCL 2.0 + ---------- + From release v1.3.0, beignet supports OpenCL 2.0 on Skylake and later hardware. +@@ -188,7 +191,7 @@ Known Issues + forever until a reboot. + + * "Beignet: self-test failed" and almost all unit tests fail. +- Linux 3.15 and 3.16 (commits [f0a346b](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=f0a346bdafaf6fc4a51df9ddf1548fd888f860d8) ++ Linux 3.15 and upstream 3.16 (_not_ Debian jessie 3.16; commits [f0a346b](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=f0a346bdafaf6fc4a51df9ddf1548fd888f860d8) + to [c9224fa](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c9224faa59c3071ecfa2d4b24592f4eb61e57069)) + enable the register whitelist by default but miss some registers needed + for Beignet. +@@ -224,10 +227,15 @@ Known Issues + This extension is partially implemented(the most commonly used part), and we will implement + other parts based on requirement. + ++* Programs using Intel-specific extensions fail to compile. ++ ++ Debian opencl-c-headers is the standard (Khronos) headers, which splits some of these into separate files ++ (e.g. cl\_ext\_intel.h): search /usr/include/CL. cl\_intel.h is in the beignet-dev package. ++ + Project repository + ------------------ + Right now, we host our project on fdo at: +-[http://cgit.freedesktop.org/beignet/](http://cgit.freedesktop.org/beignet/). ++[https://gitlab.freedesktop.org/beignet/beignet](https://gitlab.freedesktop.org/beignet/beignet). + And the Intel 01.org: + [https://01.org/beignet](https://01.org/beignet) + +@@ -263,15 +271,11 @@ If I missed any other package maintainer + + How to contribute + ----------------- +-You are always welcome to contribute to this project, just need to subscribe +-to the beignet mail list and send patches to it for review. +-The official mail list is as below: +-[http://lists.freedesktop.org/mailman/listinfo/beignet](http://lists.freedesktop.org/mailman/listinfo/beignet) +-The official bugzilla is at: +-[https://bugs.freedesktop.org/enter_bug.cgi?product=Beignet](https://bugs.freedesktop.org/enter_bug.cgi?product=Beignet) +-You are welcome to submit beignet bug. Please be noted, please specify the exact platform +-information, such as BYT/IVB/HSW/BDW, and GT1/GT2/GT3. You can easily get this information +-by running the beignet's unit test. ++Please [report bugs to Debian](https://www.debian.org/Bugs/Reporting) package beignet-opencl-icd. ++ ++Please specify your hardware when reporting a bug: _reportbug beignet-opencl-icd_ will automatically include this information. ++ ++The upstream [email list](http://lists.freedesktop.org/mailman/listinfo/beignet) and [bug tracker](https://gitlab.freedesktop.org/beignet/beignet/issues) are still available, but as upstream is mostly inactive, it may be some time before they respond. + + Documents for OpenCL application developers + ------------------------------------------- diff --git a/beignet.spec b/beignet.spec index c62d919..b486659 100644 --- a/beignet.spec +++ b/beignet.spec @@ -10,6 +10,28 @@ Source0: https://01.org/sites/default/files/beignet-%{version}-source.tar.gz # Source0-md5: a577ab18d67a891c8767b8ea62253543 Patch0: cflags.patch Patch1: static_llvm.patch +# Debian patches +Patch10: %{name}-reduce-notfound-output.patch +Patch11: %{name}-update-docs.patch +Patch12: %{name}-docs-broken-links.patch +Patch13: %{name}-cl_accelerator_intel.patch +Patch14: %{name}-grammar.patch +Patch15: %{name}-clearer-type-errors.patch +Patch16: %{name}-debian-885423.patch +Patch17: %{name}-disable-wayland-warning.patch +Patch18: %{name}-eventchain-memory-leak.patch +Patch19: %{name}-llvm6-support.patch +Patch20: %{name}-llvm7-support.patch +Patch21: %{name}-accept-old-create-queue.patch +Patch22: %{name}-reduce-notfound-output2.patch +Patch23: %{name}-coffeelake.patch +Patch24: %{name}-in-order-queue.patch +Patch25: %{name}-accept-ignore--g.patch +Patch26: %{name}-llvm8-support.patch +Patch27: %{name}-llvm9-support.patch +Patch28: %{name}-cometlake.patch +# modified +Patch29: %{name}-llvm10-support.patch URL: https://www.freedesktop.org/wiki/Software/Beignet/ BuildRequires: EGL-devel BuildRequires: Mesa-libgbm-devel @@ -56,6 +78,26 @@ poleceń, jądra i programów oraz uruchamia je na GPU. %setup -qn Beignet-%{version}-Source %patch0 -p1 %patch1 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 +%patch23 -p1 +%patch24 -p1 +%patch25 -p1 +%patch26 -p1 +%patch27 -p1 +%patch28 -p1 +%patch29 -p1 # don't lower default -std= on g++ 5+ (recent llvm requires C++14) %if "%{_ver_ge '%{cxx_version}' '5.0'}" == "1" -- 2.44.0