[SYCL-2020 Support] Enabling Intel DPCPP Compiler support to Eigen

2026-01-18 17:31:19 +01:00 · 2023-01-16 07:04:08 +00:00
parent bae119bb7e
commit b523120687
32 changed files with 305 additions and 332 deletions
--- a/bench/tensors/README
+++ b/bench/tensors/README
@@ -16,5 +16,11 @@ following commands:
 1. export COMPUTECPP_PACKAGE_ROOT_DIR={PATH TO COMPUTECPP ROOT DIRECTORY}
 2. bash eigen_sycl_bench.sh

+To compile the floating point GPU benchmarks using Intel DPCPP compiler 
+/path/to/dpcpp/bin/clang+  -DSYCL_COMPILER_IS_DPCPP -DNDEBUG -DEIGEN_MPL2_ONLY -DEIGEN_USE_SYCL=1 -I ../../  -O3 -DNDEBUG -fsycl -fsycl-targets="supported backend in DPCPP. i.e. spir64 or nvptx64-nvidia-cuda"  -std=c++17  tensor_benchmarks_sycl.cc benchmark_main.cc  -lpthread -o eigen_dpcpp_sycl
+
 Last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call
 g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu
+
+To compile the contraction with DPCPP: 
+/path/to/dpcpp/bin/clang++  -DSYCL_COMPILER_IS_DPCPP -DNDEBUG -DEIGEN_MPL2_ONLY -DEIGEN_USE_SYCL=1 -I ../../  -O3 -DNDEBUG -fsycl -fsycl-targets="supported backend in DPCPP. i.e. spir64 or nvptx64-nvidia-cuda" -std=c++17   tensor_contract_sycl_bench.cc -lpthread -o eigen_dpcpp_contract
--- a/bench/tensors/tensor_contract_sycl_bench.cc
+++ b/bench/tensors/tensor_contract_sycl_bench.cc
@@ -15,7 +15,7 @@
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#include <SYCL/sycl.hpp>
+#include <CL/sycl.hpp>
 #include <fstream>
 #include <iostream>
 #include <chrono>
@@ -56,9 +56,9 @@ void contraction(const Device& device_, TensorIndex num_iters, TensorIndex m_, T

  // Initialize the content of the memory pools to prevent asan from
  // complaining.
-  device_.fill(a_, m_ * k_, T(12));
-  device_.fill(b_, k_ * n_, T(23));
-  device_.fill(c_, m_ * n_, T(31));
+  device_.fill(a_, a_ + (m_ * k_), T(12));
+  device_.fill(b_, b_ + (k_ * n_), T(23));
+  device_.fill(c_, c_ + (m_ * n_), T(31));

  Eigen::array<TensorIndex, 2> sizeA;
  sizeA[0] = m_;
@@ -110,9 +110,9 @@ void contractionRowMajor(const Device& device_, TensorIndex num_iters, TensorInd

  // Initialize the content of the memory pools to prevent asan from
  // complaining.
-  device_.memset(a_, 12, m_ * k_ * sizeof(T));
-  device_.memset(b_, 23, k_ * n_ * sizeof(T));
-  device_.memset(c_, 31, m_ * n_ * sizeof(T));
+  device_.memset(a_, T(12), T(m_ * k_ * sizeof(T)));
+  device_.memset(b_, T(23), T(k_ * n_ * sizeof(T)));
+  device_.memset(c_, T(31), T(m_ * n_ * sizeof(T)));

  Eigen::array<TensorIndex, 2> sizeA;
  sizeA[0] = m_;