0
0
mirror of https://gitlab.com/libeigen/eigen.git synced 2026-01-18 17:31:19 +01:00

[SYCL-2020 Support] Enabling Intel DPCPP Compiler support to Eigen

This commit is contained in:
Mehdi Goli
2023-01-16 07:04:08 +00:00
committed by Antonio Sánchez
parent bae119bb7e
commit b523120687
32 changed files with 305 additions and 332 deletions

View File

@@ -16,5 +16,11 @@ following commands:
1. export COMPUTECPP_PACKAGE_ROOT_DIR={PATH TO COMPUTECPP ROOT DIRECTORY}
2. bash eigen_sycl_bench.sh
To compile the floating point GPU benchmarks using Intel DPCPP compiler
/path/to/dpcpp/bin/clang+ -DSYCL_COMPILER_IS_DPCPP -DNDEBUG -DEIGEN_MPL2_ONLY -DEIGEN_USE_SYCL=1 -I ../../ -O3 -DNDEBUG -fsycl -fsycl-targets="supported backend in DPCPP. i.e. spir64 or nvptx64-nvidia-cuda" -std=c++17 tensor_benchmarks_sycl.cc benchmark_main.cc -lpthread -o eigen_dpcpp_sycl
Last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call
g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu
To compile the contraction with DPCPP:
/path/to/dpcpp/bin/clang++ -DSYCL_COMPILER_IS_DPCPP -DNDEBUG -DEIGEN_MPL2_ONLY -DEIGEN_USE_SYCL=1 -I ../../ -O3 -DNDEBUG -fsycl -fsycl-targets="supported backend in DPCPP. i.e. spir64 or nvptx64-nvidia-cuda" -std=c++17 tensor_contract_sycl_bench.cc -lpthread -o eigen_dpcpp_contract

View File

@@ -15,7 +15,7 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#include <SYCL/sycl.hpp>
#include <CL/sycl.hpp>
#include <fstream>
#include <iostream>
#include <chrono>
@@ -56,9 +56,9 @@ void contraction(const Device& device_, TensorIndex num_iters, TensorIndex m_, T
// Initialize the content of the memory pools to prevent asan from
// complaining.
device_.fill(a_, m_ * k_, T(12));
device_.fill(b_, k_ * n_, T(23));
device_.fill(c_, m_ * n_, T(31));
device_.fill(a_, a_ + (m_ * k_), T(12));
device_.fill(b_, b_ + (k_ * n_), T(23));
device_.fill(c_, c_ + (m_ * n_), T(31));
Eigen::array<TensorIndex, 2> sizeA;
sizeA[0] = m_;
@@ -110,9 +110,9 @@ void contractionRowMajor(const Device& device_, TensorIndex num_iters, TensorInd
// Initialize the content of the memory pools to prevent asan from
// complaining.
device_.memset(a_, 12, m_ * k_ * sizeof(T));
device_.memset(b_, 23, k_ * n_ * sizeof(T));
device_.memset(c_, 31, m_ * n_ * sizeof(T));
device_.memset(a_, T(12), T(m_ * k_ * sizeof(T)));
device_.memset(b_, T(23), T(k_ * n_ * sizeof(T)));
device_.memset(c_, T(31), T(m_ * n_ * sizeof(T)));
Eigen::array<TensorIndex, 2> sizeA;
sizeA[0] = m_;