diff --git a/docs/domains/sparse_linear_algebra.rst b/docs/domains/sparse_linear_algebra.rst new file mode 100644 index 000000000..eab5afd56 --- /dev/null +++ b/docs/domains/sparse_linear_algebra.rst @@ -0,0 +1,40 @@ +.. _onemkl_sparse_linear_algebra: + +Sparse Linear Algebra +===================== + +See the latest specification for the sparse domain `here +`_. + +This page documents implementation specific or backend specific details of the +sparse domain. + +OneMKL Intel CPU and GPU backends +--------------------------------- + +Currently known limitations: + +- All operations' algorithms except ``no_optimize_alg`` map to the default + algorithm. +- The required external workspace size is always 0 bytes. +- ``oneapi::mkl::sparse::set_csr_data`` and + ``oneapi::mkl::sparse::set_coo_data`` functions cannot be used on a handle + that has already been used for an operation or its optimize function. Doing so + will throw an ``oneapi::mkl::unimplemented`` exception. +- Using ``spsv`` with the ``oneapi::mkl::sparse::spsv_alg::no_optimize_alg`` and + a sparse matrix that does not have the + ``oneapi::mkl::sparse::matrix_property::sorted`` property will throw an + ``oneapi::mkl::unimplemented`` exception. +- Using ``spmm`` on Intel GPU with a sparse matrix that is + ``oneapi::mkl::transpose::conjtrans`` and has the + ``oneapi::mkl::sparse::matrix_property::symmetric`` property will throw an + ``oneapi::mkl::unimplemented`` exception. +- Using ``spmv`` with a sparse matrix that is + ``oneapi::mkl::transpose::conjtrans`` with a ``type_view`` + ``matrix_descr::symmetric`` or ``matrix_descr::hermitian`` will throw an + ``oneapi::mkl::unimplemented`` exception. +- Using ``spsv`` on Intel GPU with a sparse matrix that is + ``oneapi::mkl::transpose::conjtrans`` and will throw an + ``oneapi::mkl::unimplemented`` exception. +- Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent + synchronizations and copies to the host. diff --git a/docs/index.rst b/docs/index.rst index 51e4216ee..e17eeff6c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -33,4 +33,5 @@ Contents onemkl-datatypes.rst domains/dense_linear_algebra.rst + domains/sparse_linear_algebra.rst create_new_backend.rst diff --git a/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt index cb95333b4..5dbbba8a4 100644 --- a/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt +++ b/examples/sparse_blas/compile_time_dispatching/CMakeLists.txt @@ -27,8 +27,8 @@ endif() include(WarningsUtils) foreach(backend ${SPARSE_BLAS_BACKENDS}) - set(EXAMPLE_NAME example_sparse_blas_gemv_usm_${backend}) - add_executable(${EXAMPLE_NAME} sparse_blas_gemv_usm_${backend}.cpp) + set(EXAMPLE_NAME example_sparse_blas_spmv_usm_${backend}) + add_executable(${EXAMPLE_NAME} sparse_blas_spmv_usm_${backend}.cpp) target_include_directories(${EXAMPLE_NAME} PUBLIC ${PROJECT_SOURCE_DIR}/examples/include PUBLIC ${PROJECT_SOURCE_DIR}/include @@ -39,6 +39,6 @@ foreach(backend ${SPARSE_BLAS_BACKENDS}) target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_${backend}) # Register example as ctest - add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_gemv_usm_${backend} COMMAND ${EXAMPLE_NAME}) + add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_spmv_usm_${backend} COMMAND ${EXAMPLE_NAME}) endforeach(backend) diff --git a/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp similarity index 73% rename from examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp rename to examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp index edb6d7e1f..4ab078601 100644 --- a/examples/sparse_blas/compile_time_dispatching/sparse_blas_gemv_usm_mklcpu.cpp +++ b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp @@ -20,7 +20,7 @@ /* * * Content: -* This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv +* This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv * using unified shared memory to perform general sparse matrix-vector * multiplication on a INTEL CPU SYCL device. * @@ -32,7 +32,7 @@ * * * This example demonstrates only single precision (float) data type for -* gemv matrix data +* spmv matrix data * * *******************************************************************************/ @@ -77,7 +77,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { } catch (sycl::exception const &e) { std::cout << "Caught asynchronous SYCL " - "exception during sparse::gemv:\n" + "exception during sparse::spmv:\n" << e.what() << std::endl; } } @@ -128,7 +128,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans; - std::cout << "\n\t\tsparse::gemv parameters:\n"; + oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg; + oneapi::mkl::sparse::matrix_view A_view; + + std::cout << "\n\t\tsparse::spmv parameters:\n"; std::cout << "\t\t\ttransA = " << (transA == oneapi::mkl::transpose::nontrans ? "nontrans" @@ -137,23 +140,49 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { std::cout << "\t\t\tnrows = " << nrows << std::endl; std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl; - // create and initialize handle for a Sparse Matrix in CSR format - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - - oneapi::mkl::sparse::init_matrix_handle(cpu_selector, &handle); - - auto ev_set = oneapi::mkl::sparse::set_csr_data(cpu_selector, handle, nrows, nrows, nnz, - oneapi::mkl::index_base::zero, ia, ja, a); - - auto ev_opt = oneapi::mkl::sparse::optimize_gemv(cpu_selector, transA, handle, { ev_set }); - - auto ev_gemv = - oneapi::mkl::sparse::gemv(cpu_selector, transA, alpha, handle, x, beta, y, { ev_opt }); - - auto ev_release = - oneapi::mkl::sparse::release_matrix_handle(cpu_selector, &handle, { ev_gemv }); - - ev_release.wait_and_throw(); + // Create and initialize handle for a Sparse Matrix in CSR format + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::init_csr_matrix(cpu_selector, &A_handle, nrows, nrows, nnz, + oneapi::mkl::index_base::zero, ia, ja, a); + + // Create and initialize dense vector handles + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::init_dense_vector(cpu_selector, &x_handle, sizevec, x); + oneapi::mkl::sparse::init_dense_vector(cpu_selector, &y_handle, sizevec, y); + + // Create operation descriptor + oneapi::mkl::sparse::spmv_descr_t descr = nullptr; + oneapi::mkl::sparse::init_spmv_descr(cpu_selector, &descr); + + // Allocate external workspace + std::size_t workspace_size = 0; + oneapi::mkl::sparse::spmv_buffer_size(cpu_selector, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, workspace_size); + void *workspace = sycl::malloc_device(workspace_size, cpu_queue); + + // Optimize spmv + auto ev_opt = + oneapi::mkl::sparse::spmv_optimize(cpu_selector, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, workspace); + + // Run spmv + auto ev_spmv = oneapi::mkl::sparse::spmv(cpu_selector, transA, &alpha, A_view, A_handle, + x_handle, &beta, y_handle, alg, descr, { ev_opt }); + + // Release handles and descriptor + std::vector release_events; + release_events.push_back( + oneapi::mkl::sparse::release_dense_vector(cpu_selector, x_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_dense_vector(cpu_selector, y_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_sparse_matrix(cpu_selector, A_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_spmv_descr(cpu_selector, descr, { ev_spmv })); + for (auto event : release_events) { + event.wait_and_throw(); + } // // Post Processing @@ -181,7 +210,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { good &= check_result(res[row], z[row], nrows, row); } - std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished" + std::cout << "\n\t\t sparse::spmv example " << (good ? "passed" : "failed") << "\n\tFinished" << std::endl; free_vec(fp_ptr_vec, cpu_queue); @@ -211,7 +240,7 @@ void print_example_banner() { std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl; std::cout << "# " << std::endl; std::cout << "# Using apis:" << std::endl; - std::cout << "# sparse::gemv" << std::endl; + std::cout << "# sparse::spmv" << std::endl; std::cout << "# " << std::endl; std::cout << "# Using single precision (float) data type" << std::endl; std::cout << "# " << std::endl; @@ -232,22 +261,22 @@ int main(int /*argc*/, char ** /*argv*/) { // TODO: Add cuSPARSE compile-time dispatcher in this example once it is supported. sycl::device cpu_dev(sycl::cpu_selector_v); - std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl; + std::cout << "Running Sparse BLAS SPMV USM example on CPU device." << std::endl; std::cout << "Device name is: " << cpu_dev.get_info() << std::endl; std::cout << "Running with single precision real data type:" << std::endl; run_sparse_matrix_vector_multiply_example(cpu_dev); - std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl; + std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl; } catch (sycl::exception const &e) { - std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl; + std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } catch (std::exception const &e) { - std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl; + std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; return 1; } diff --git a/examples/sparse_blas/run_time_dispatching/CMakeLists.txt b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt index 6f144c898..398f3e0f2 100644 --- a/examples/sparse_blas/run_time_dispatching/CMakeLists.txt +++ b/examples/sparse_blas/run_time_dispatching/CMakeLists.txt @@ -22,7 +22,7 @@ include(WarningsUtils) # Build object from all example sources -set(SPARSE_BLAS_RT_SOURCES "sparse_blas_gemv_usm") +set(SPARSE_BLAS_RT_SOURCES "sparse_blas_spmv_usm") # Set up for the right backend for run-time dispatching examples # If users build more than one backend (i.e. mklcpu and mklgpu, or mklcpu and CUDA), they may need to # overwrite ONEAPI_DEVICE_SELECTOR in their environment to run on the desired backend diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp similarity index 73% rename from examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp rename to examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp index b5812fabf..d87297600 100644 --- a/examples/sparse_blas/run_time_dispatching/sparse_blas_gemv_usm.cpp +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp @@ -20,7 +20,7 @@ /* * * Content: -* This example demonstrates use of DPCPP API oneapi::mkl::sparse::gemv +* This example demonstrates use of DPCPP API oneapi::mkl::sparse::spmv * using unified shared memory to perform general sparse matrix-vector * multiplication on a SYCL device (HOST, CPU, GPU) that is selected * during runtime. @@ -33,7 +33,7 @@ * * * This example demonstrates only single precision (float) data type for -* gemv matrix data +* spmv matrix data * * *******************************************************************************/ @@ -78,7 +78,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { } catch (sycl::exception const &e) { std::cout << "Caught asynchronous SYCL " - "exception during sparse::gemv:\n" + "exception during sparse::spmv:\n" << e.what() << std::endl; } } @@ -93,6 +93,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { std::size_t sizeja = static_cast(27 * nrows); std::size_t sizeia = static_cast(nrows + 1); std::size_t sizevec = static_cast(nrows); + auto sizevec_i64 = static_cast(sizevec); ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), main_queue); ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), main_queue); @@ -128,7 +129,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans; - std::cout << "\n\t\tsparse::gemv parameters:\n"; + oneapi::mkl::sparse::spmv_alg alg = oneapi::mkl::sparse::spmv_alg::default_alg; + oneapi::mkl::sparse::matrix_view A_view; + + std::cout << "\n\t\tsparse::spmv parameters:\n"; std::cout << "\t\t\ttransA = " << (transA == oneapi::mkl::transpose::nontrans ? "nontrans" @@ -137,22 +141,49 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { std::cout << "\t\t\tnrows = " << nrows << std::endl; std::cout << "\t\t\talpha = " << alpha << ", beta = " << beta << std::endl; - // create and initialize handle for a Sparse Matrix in CSR format - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - - oneapi::mkl::sparse::init_matrix_handle(main_queue, &handle); - - auto ev_set = oneapi::mkl::sparse::set_csr_data(main_queue, handle, nrows, nrows, nnz, - oneapi::mkl::index_base::zero, ia, ja, a); - - auto ev_opt = oneapi::mkl::sparse::optimize_gemv(main_queue, transA, handle, { ev_set }); - - auto ev_gemv = - oneapi::mkl::sparse::gemv(main_queue, transA, alpha, handle, x, beta, y, { ev_opt }); - - auto ev_release = oneapi::mkl::sparse::release_matrix_handle(main_queue, &handle, { ev_gemv }); - - ev_release.wait_and_throw(); + // Create and initialize handle for a Sparse Matrix in CSR format + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz, + oneapi::mkl::index_base::zero, ia, ja, a); + + // Create and initialize dense vector handles + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::init_dense_vector(main_queue, &x_handle, sizevec_i64, x); + oneapi::mkl::sparse::init_dense_vector(main_queue, &y_handle, sizevec_i64, y); + + // Create operation descriptor + oneapi::mkl::sparse::spmv_descr_t descr = nullptr; + oneapi::mkl::sparse::init_spmv_descr(main_queue, &descr); + + // Allocate external workspace + std::size_t workspace_size = 0; + oneapi::mkl::sparse::spmv_buffer_size(main_queue, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, workspace_size); + void *workspace = sycl::malloc_device(workspace_size, main_queue); + + // Optimize spmv + auto ev_opt = + oneapi::mkl::sparse::spmv_optimize(main_queue, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, workspace); + + // Run spmv + auto ev_spmv = oneapi::mkl::sparse::spmv(main_queue, transA, &alpha, A_view, A_handle, x_handle, + &beta, y_handle, alg, descr, { ev_opt }); + + // Release handles and descriptor + std::vector release_events; + release_events.push_back( + oneapi::mkl::sparse::release_dense_vector(main_queue, x_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_dense_vector(main_queue, y_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_sparse_matrix(main_queue, A_handle, { ev_spmv })); + release_events.push_back( + oneapi::mkl::sparse::release_spmv_descr(main_queue, descr, { ev_spmv })); + for (auto event : release_events) { + event.wait_and_throw(); + } // // Post Processing @@ -180,7 +211,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { good &= check_result(res[row], z[row], nrows, row); } - std::cout << "\n\t\t sparse::gemv example " << (good ? "passed" : "failed") << "\n\tFinished" + std::cout << "\n\t\t sparse::spmv example " << (good ? "passed" : "failed") << "\n\tFinished" << std::endl; free_vec(fp_ptr_vec, main_queue); @@ -210,7 +241,7 @@ void print_example_banner() { std::cout << "# and alpha, beta are floating point type precision scalars." << std::endl; std::cout << "# " << std::endl; std::cout << "# Using apis:" << std::endl; - std::cout << "# sparse::gemv" << std::endl; + std::cout << "# sparse::spmv" << std::endl; std::cout << "# " << std::endl; std::cout << "# Using single precision (float) data type" << std::endl; std::cout << "# " << std::endl; @@ -234,28 +265,28 @@ int main(int /*argc*/, char ** /*argv*/) { sycl::device dev = sycl::device(); if (dev.is_gpu()) { - std::cout << "Running Sparse BLAS GEMV USM example on GPU device." << std::endl; + std::cout << "Running Sparse BLAS SPMV USM example on GPU device." << std::endl; std::cout << "Device name is: " << dev.get_info() << std::endl; } else { - std::cout << "Running Sparse BLAS GEMV USM example on CPU device." << std::endl; + std::cout << "Running Sparse BLAS SPMV USM example on CPU device." << std::endl; std::cout << "Device name is: " << dev.get_info() << std::endl; } std::cout << "Running with single precision real data type:" << std::endl; run_sparse_matrix_vector_multiply_example(dev); - std::cout << "Sparse BLAS GEMV USM example ran OK." << std::endl; + std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl; } catch (sycl::exception const &e) { - std::cerr << "Caught synchronous SYCL exception during Sparse GEMV:" << std::endl; + std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } catch (std::exception const &e) { - std::cerr << "Caught std::exception during Sparse GEMV:" << std::endl; + std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; return 1; } diff --git a/include/oneapi/mkl/sparse_blas/detail/handles.hpp b/include/oneapi/mkl/sparse_blas/detail/handles.hpp new file mode 100644 index 000000000..0566f93b4 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/handles.hpp @@ -0,0 +1,38 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_ +#define _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_ + +namespace oneapi::mkl::sparse { + +// Each backend can create its own handle type or re-use the native handle types that will be reinterpret_cast'ed to the types below + +struct dense_matrix_handle; +using dense_matrix_handle_t = dense_matrix_handle*; + +struct dense_vector_handle; +using dense_vector_handle_t = dense_vector_handle*; + +struct matrix_handle; +using matrix_handle_t = matrix_handle*; + +} // namespace oneapi::mkl::sparse + +#endif // _ONEMKL_SPARSE_BLAS_DETAIL_HANDLES_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp index 4964b1eff..ace216f00 100644 --- a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp @@ -29,20 +29,18 @@ namespace mkl { namespace sparse { namespace detail { -struct matrix_handle; - -template +template inline constexpr bool is_fp_supported_v = - std::is_same_v || std::is_same_v || - std::is_same_v> || std::is_same_v>; + std::is_same_v || std::is_same_v || + std::is_same_v> || std::is_same_v>; -template +template inline constexpr bool is_int_supported_v = - std::is_same_v || std::is_same_v; + std::is_same_v || std::is_same_v; -template +template inline constexpr bool are_fp_int_supported_v = - is_fp_supported_v&& is_int_supported_v; + is_fp_supported_v&& is_int_supported_v; } // namespace detail } // namespace sparse diff --git a/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp b/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp index 2535e61f6..8686d35bc 100644 --- a/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp @@ -22,6 +22,7 @@ #include "oneapi/mkl/detail/export.hpp" #include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" +#include "oneapi/mkl/sparse_blas/types.hpp" namespace oneapi::mkl::sparse::mklcpu { diff --git a/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp b/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp index bc0089c57..ee127c3f8 100644 --- a/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/mklcpu/sparse_blas_ct.hpp @@ -20,7 +20,6 @@ #ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_ #define _ONEMKL_SPARSE_BLAS_DETAIL_MKLCPU_SPARSE_BLAS_CT_HPP_ -#include "oneapi/mkl/sparse_blas/types.hpp" #include "oneapi/mkl/detail/backends.hpp" #include "oneapi/mkl/detail/backend_selector.hpp" diff --git a/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp b/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp index 1ca336b9b..eb3aaa5ff 100644 --- a/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp @@ -22,6 +22,7 @@ #include "oneapi/mkl/detail/export.hpp" #include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" +#include "oneapi/mkl/sparse_blas/types.hpp" namespace oneapi::mkl::sparse::mklgpu { diff --git a/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp b/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp index 00c01346f..d3b0d365f 100644 --- a/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/mklgpu/sparse_blas_ct.hpp @@ -20,7 +20,6 @@ #ifndef _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_ #define _ONEMKL_SPARSE_BLAS_DETAIL_MKLGPU_SPARSE_BLAS_CT_HPP_ -#include "oneapi/mkl/sparse_blas/types.hpp" #include "oneapi/mkl/detail/backends.hpp" #include "oneapi/mkl/detail/backend_selector.hpp" diff --git a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx index 03beaa4b4..4b701eb6f 100644 --- a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx +++ b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx @@ -20,72 +20,198 @@ // This file is meant to be included in each backend onemkl_sparse_blas_BACKEND.hpp files. // It is used to exports each symbol to the onemkl_sparse_blas_BACKEND library. -ONEMKL_EXPORT void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle); +// Dense vector +template +ONEMKL_EXPORT void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, + std::int64_t size, sycl::buffer val); +template +ONEMKL_EXPORT void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, + std::int64_t size, dataType *val); -ONEMKL_EXPORT sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, +template +ONEMKL_EXPORT void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, + std::int64_t size, sycl::buffer val); +template +ONEMKL_EXPORT void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, + std::int64_t size, dataType *val); + +ONEMKL_EXPORT sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, + const std::vector &dependencies = {}); + +// Dense matrix +template +ONEMKL_EXPORT void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + layout dense_layout, sycl::buffer val); +template +ONEMKL_EXPORT void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + layout dense_layout, dataType *val); + +template +ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t ld, layout dense_layout, + sycl::buffer val); +template +ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t ld, layout dense_layout, dataType *val); + +ONEMKL_EXPORT sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, + const std::vector &dependencies = {}); + +// COO matrix +template +ONEMKL_EXPORT void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, + sycl::buffer val); +template +ONEMKL_EXPORT void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, indexType *row_ind, indexType *col_ind, + dataType *val); + +template +ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, + sycl::buffer row_ind, + sycl::buffer col_ind, + sycl::buffer val); +template +ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ind, + indexType *col_ind, dataType *val); + +// CSR matrix +template +ONEMKL_EXPORT void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, + sycl::buffer val); +template +ONEMKL_EXPORT void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + index_base index, indexType *row_ptr, indexType *col_ind, + dataType *val); + +template +ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, + sycl::buffer row_ptr, + sycl::buffer col_ind, + sycl::buffer val); +template +ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ptr, + indexType *col_ind, dataType *val); + +// Common sparse matrix functions +ONEMKL_EXPORT sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, const std::vector &dependencies = {}); -template -ONEMKL_EXPORT std::enable_if_t> set_csr_data( - sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, - index_base index, sycl::buffer &row_ptr, sycl::buffer &col_ind, - sycl::buffer &val); +bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property); -template -ONEMKL_EXPORT std::enable_if_t, sycl::event> -set_csr_data(sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, - intType nnz, index_base index, intType *row_ptr, intType *col_ind, fpType *val, - const std::vector &dependencies = {}); +// SPMM +ONEMKL_EXPORT void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr); -ONEMKL_EXPORT sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, - matrix_handle_t handle, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, + const std::vector &dependencies = {}); + +ONEMKL_EXPORT void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, std::size_t &temp_buffer_size); -ONEMKL_EXPORT sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, - transpose transpose_B, layout dense_matrix_layout, - const std::int64_t columns, matrix_handle_t handle, +ONEMKL_EXPORT void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, sycl::buffer workspace); + +ONEMKL_EXPORT sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void *workspace, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, - matrix_handle_t handle, +ONEMKL_EXPORT sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, + const std::vector &dependencies = {}); + +// SPMV +ONEMKL_EXPORT void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr); + +ONEMKL_EXPORT sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, + const std::vector &dependencies = {}); + +ONEMKL_EXPORT void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t &temp_buffer_size); + +ONEMKL_EXPORT void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, sycl::buffer workspace); + +ONEMKL_EXPORT sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, + spmv_alg alg, spmv_descr_t spmv_descr, void *workspace, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, - diag diag_val, matrix_handle_t handle, +ONEMKL_EXPORT sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, + const std::vector &dependencies = {}); + +// SPSV +ONEMKL_EXPORT void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr); + +ONEMKL_EXPORT sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}); + +ONEMKL_EXPORT void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size); + +ONEMKL_EXPORT void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace); + +ONEMKL_EXPORT sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, + spsv_descr_t spsv_descr, void *workspace, const std::vector &dependencies = {}); -template -ONEMKL_EXPORT std::enable_if_t> gemv( - sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, - sycl::buffer &x, const fpType beta, sycl::buffer &y); - -template -ONEMKL_EXPORT std::enable_if_t, sycl::event> gemv( - sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, - const fpType *x, const fpType beta, fpType *y, - const std::vector &dependencies = {}); - -template -ONEMKL_EXPORT std::enable_if_t> trsv( - sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t A_handle, sycl::buffer &x, sycl::buffer &y); - -template -ONEMKL_EXPORT std::enable_if_t, sycl::event> trsv( - sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t A_handle, const fpType *x, fpType *y, - const std::vector &dependencies = {}); - -template -ONEMKL_EXPORT std::enable_if_t> gemm( - sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, - sycl::buffer &C, const std::int64_t ldc); - -template -ONEMKL_EXPORT std::enable_if_t, sycl::event> gemm( - sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, matrix_handle_t A_handle, const fpType *B, const std::int64_t columns, - const std::int64_t ldb, const fpType beta, fpType *C, const std::int64_t ldc, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}); diff --git a/include/oneapi/mkl/sparse_blas/detail/operation_types.hpp b/include/oneapi/mkl/sparse_blas/detail/operation_types.hpp new file mode 100644 index 000000000..b79036830 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/detail/operation_types.hpp @@ -0,0 +1,38 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_ +#define _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_ + +namespace oneapi::mkl::sparse { + +// Each backend can create its own descriptor type or re-use the native descriptor types that will be reinterpret_cast'ed to the types below + +struct spmm_descr; +using spmm_descr_t = spmm_descr*; + +struct spmv_descr; +using spmv_descr_t = spmv_descr*; + +struct spsv_descr; +using spsv_descr_t = spsv_descr*; + +} // namespace oneapi::mkl::sparse + +#endif // _ONEMKL_SPARSE_BLAS_DETAIL_OPERATION_TYPES_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx index 41fe51c49..ca09d09d4 100644 --- a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx +++ b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx @@ -24,112 +24,307 @@ #error "BACKEND is not defined" #endif -inline void init_matrix_handle(backend_selector selector, - matrix_handle_t *p_handle) { - BACKEND::init_matrix_handle(selector.get_queue(), p_handle); +// Dense vector +template +std::enable_if_t> init_dense_vector( + backend_selector selector, dense_vector_handle_t *p_dvhandle, + std::int64_t size, sycl::buffer val) { + BACKEND::init_dense_vector(selector.get_queue(), p_dvhandle, size, val); +} +template +std::enable_if_t> init_dense_vector( + backend_selector selector, dense_vector_handle_t *p_dvhandle, + std::int64_t size, dataType *val) { + BACKEND::init_dense_vector(selector.get_queue(), p_dvhandle, size, val); +} + +template +std::enable_if_t> set_dense_vector_data( + backend_selector selector, dense_vector_handle_t dvhandle, std::int64_t size, + sycl::buffer val) { + BACKEND::set_dense_vector_data(selector.get_queue(), dvhandle, size, val); +} +template +std::enable_if_t> set_dense_vector_data( + backend_selector selector, dense_vector_handle_t dvhandle, std::int64_t size, + dataType *val) { + BACKEND::set_dense_vector_data(selector.get_queue(), dvhandle, size, val); +} + +inline sycl::event release_dense_vector(backend_selector selector, + dense_vector_handle_t dvhandle, + const std::vector &dependencies = {}) { + return BACKEND::release_dense_vector(selector.get_queue(), dvhandle, dependencies); +} + +// Dense matrix +template +std::enable_if_t> init_dense_matrix( + backend_selector selector, dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, + sycl::buffer val) { + BACKEND::init_dense_matrix(selector.get_queue(), p_dmhandle, num_rows, num_cols, ld, + dense_layout, val); +} +template +std::enable_if_t> init_dense_matrix( + backend_selector selector, dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, + dataType *val) { + BACKEND::init_dense_matrix(selector.get_queue(), p_dmhandle, num_rows, num_cols, ld, + dense_layout, val); +} + +template +std::enable_if_t> set_dense_matrix_data( + backend_selector selector, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, + sycl::buffer val) { + BACKEND::set_dense_matrix_data(selector.get_queue(), dmhandle, num_rows, num_cols, ld, + dense_layout, val); +} +template +std::enable_if_t> set_dense_matrix_data( + backend_selector selector, dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, + dataType *val) { + BACKEND::set_dense_matrix_data(selector.get_queue(), dmhandle, num_rows, num_cols, ld, + dense_layout, val); +} + +inline sycl::event release_dense_matrix(backend_selector selector, + dense_matrix_handle_t dmhandle, + const std::vector &dependencies = {}) { + return BACKEND::release_dense_matrix(selector.get_queue(), dmhandle, dependencies); } -inline sycl::event release_matrix_handle(backend_selector selector, - matrix_handle_t *p_handle, +// COO matrix +template +std::enable_if_t> init_coo_matrix( + backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val) { + BACKEND::init_coo_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, + row_ind, col_ind, val); +} +template +std::enable_if_t> init_coo_matrix( + backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ind, + indexType *col_ind, dataType *val) { + BACKEND::init_coo_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, + row_ind, col_ind, val); +} + +template +std::enable_if_t> set_coo_matrix_data( + backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val) { + BACKEND::set_coo_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, + row_ind, col_ind, val); +} +template +std::enable_if_t> set_coo_matrix_data( + backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ind, + indexType *col_ind, dataType *val) { + BACKEND::set_coo_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, + row_ind, col_ind, val); +} + +// CSR matrix +template +std::enable_if_t> init_csr_matrix( + backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val) { + BACKEND::init_csr_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val); +} +template +std::enable_if_t> init_csr_matrix( + backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ptr, + indexType *col_ind, dataType *val) { + BACKEND::init_csr_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val); +} + +template +std::enable_if_t> set_csr_matrix_data( + backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val) { + BACKEND::set_csr_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val); +} +template +std::enable_if_t> set_csr_matrix_data( + backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ptr, + indexType *col_ind, dataType *val) { + BACKEND::set_csr_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, + row_ptr, col_ind, val); +} + +// Common sparse matrix functions +inline sycl::event release_sparse_matrix(backend_selector selector, + matrix_handle_t smhandle, const std::vector &dependencies = {}) { - return BACKEND::release_matrix_handle(selector.get_queue(), p_handle, dependencies); + return BACKEND::release_sparse_matrix(selector.get_queue(), smhandle, dependencies); } -template -std::enable_if_t> set_csr_data( - backend_selector selector, matrix_handle_t handle, intType num_rows, - intType num_cols, intType nnz, index_base index, sycl::buffer &row_ptr, - sycl::buffer &col_ind, sycl::buffer &val) { - BACKEND::set_csr_data(selector.get_queue(), handle, num_rows, num_cols, nnz, index, row_ptr, - col_ind, val); +inline bool set_matrix_property(backend_selector selector, + matrix_handle_t smhandle, matrix_property property) { + return BACKEND::set_matrix_property(selector.get_queue(), smhandle, property); } -template -std::enable_if_t, sycl::event> set_csr_data( - backend_selector selector, matrix_handle_t handle, intType num_rows, - intType num_cols, intType nnz, index_base index, intType *row_ptr, intType *col_ind, - fpType *val, const std::vector &dependencies = {}) { - return BACKEND::set_csr_data(selector.get_queue(), handle, num_rows, num_cols, nnz, index, - row_ptr, col_ind, val, dependencies); +// SPMM +inline void init_spmm_descr(backend_selector selector, + spmm_descr_t *p_spmm_descr) { + BACKEND::init_spmm_descr(selector.get_queue(), p_spmm_descr); } -inline sycl::event optimize_gemm(backend_selector selector, transpose transpose_A, - matrix_handle_t handle, - const std::vector &dependencies = {}) { - return BACKEND::optimize_gemm(selector.get_queue(), transpose_A, handle, dependencies); +inline sycl::event release_spmm_descr(backend_selector selector, + spmm_descr_t spmm_descr, + const std::vector &dependencies = {}) { + return BACKEND::release_spmm_descr(selector.get_queue(), spmm_descr, dependencies); } -inline sycl::event optimize_gemm(backend_selector selector, transpose transpose_A, - transpose transpose_B, layout dense_matrix_layout, - const std::int64_t columns, matrix_handle_t handle, +inline void spmm_buffer_size(backend_selector selector, + oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, + std::size_t &temp_buffer_size) { + BACKEND::spmm_buffer_size(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, + beta, C_handle, alg, spmm_descr, temp_buffer_size); +} + +inline void spmm_optimize(backend_selector selector, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, sycl::buffer workspace) { + BACKEND::spmm_optimize(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, + C_handle, alg, spmm_descr, workspace); +} + +inline sycl::event spmm_optimize(backend_selector selector, + oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void *workspace, const std::vector &dependencies = {}) { - return BACKEND::optimize_gemm(selector.get_queue(), transpose_A, transpose_B, - dense_matrix_layout, columns, handle, dependencies); + return BACKEND::spmm_optimize(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, + beta, C_handle, alg, spmm_descr, workspace, dependencies); +} + +inline sycl::event spmm(backend_selector selector, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, + const std::vector &dependencies = {}) { + return BACKEND::spmm(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, + C_handle, alg, spmm_descr, dependencies); +} + +// SPMV +inline void init_spmv_descr(backend_selector selector, + spmv_descr_t *p_spmv_descr) { + BACKEND::init_spmv_descr(selector.get_queue(), p_spmv_descr); +} + +inline sycl::event release_spmv_descr(backend_selector selector, + spmv_descr_t spmv_descr, + const std::vector &dependencies = {}) { + return BACKEND::release_spmv_descr(selector.get_queue(), spmv_descr, dependencies); +} + +inline void spmv_buffer_size(backend_selector selector, + oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + BACKEND::spmv_buffer_size(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, temp_buffer_size); +} + +inline void spmv_optimize(backend_selector selector, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, + sycl::buffer workspace) { + BACKEND::spmv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, workspace); } -inline sycl::event optimize_gemv(backend_selector selector, - transpose transpose_val, matrix_handle_t handle, +inline sycl::event spmv_optimize(backend_selector selector, + oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, void *workspace, const std::vector &dependencies = {}) { - return BACKEND::optimize_gemv(selector.get_queue(), transpose_val, handle, dependencies); + return BACKEND::spmv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, + beta, y_handle, alg, spmv_descr, workspace, dependencies); +} + +inline sycl::event spmv(backend_selector selector, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, + const std::vector &dependencies = {}) { + return BACKEND::spmv(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, dependencies); +} + +// SPSV +inline void init_spsv_descr(backend_selector selector, + spsv_descr_t *p_spsv_descr) { + BACKEND::init_spsv_descr(selector.get_queue(), p_spsv_descr); +} + +inline sycl::event release_spsv_descr(backend_selector selector, + spsv_descr_t spsv_descr, + const std::vector &dependencies = {}) { + return BACKEND::release_spsv_descr(selector.get_queue(), spsv_descr, dependencies); } -inline sycl::event optimize_trsv(backend_selector selector, uplo uplo_val, - transpose transpose_val, diag diag_val, matrix_handle_t handle, +inline void spsv_buffer_size(backend_selector selector, + oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size) { + BACKEND::spsv_buffer_size(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, temp_buffer_size); +} + +inline void spsv_optimize(backend_selector selector, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace) { + BACKEND::spsv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, + alg, spsv_descr, workspace); +} + +inline sycl::event spsv_optimize(backend_selector selector, + oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, + spsv_descr_t spsv_descr, void *workspace, const std::vector &dependencies = {}) { - return BACKEND::optimize_trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, handle, - dependencies); -} - -template -std::enable_if_t> gemv( - backend_selector selector, transpose transpose_val, const fpType alpha, - matrix_handle_t A_handle, sycl::buffer &x, const fpType beta, - sycl::buffer &y) { - BACKEND::gemv(selector.get_queue(), transpose_val, alpha, A_handle, x, beta, y); -} - -template -std::enable_if_t, sycl::event> gemv( - backend_selector selector, transpose transpose_val, const fpType alpha, - matrix_handle_t A_handle, const fpType *x, const fpType beta, fpType *y, - const std::vector &dependencies = {}) { - return BACKEND::gemv(selector.get_queue(), transpose_val, alpha, A_handle, x, beta, y, - dependencies); -} - -template -std::enable_if_t> trsv( - backend_selector selector, uplo uplo_val, transpose transpose_val, - diag diag_val, matrix_handle_t A_handle, sycl::buffer &x, - sycl::buffer &y) { - BACKEND::trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, A_handle, x, y); -} - -template -std::enable_if_t, sycl::event> trsv( - backend_selector selector, uplo uplo_val, transpose transpose_val, - diag diag_val, matrix_handle_t A_handle, const fpType *x, fpType *y, - const std::vector &dependencies = {}) { - return BACKEND::trsv(selector.get_queue(), uplo_val, transpose_val, diag_val, A_handle, x, y, - dependencies); -} - -template -std::enable_if_t> gemm( - backend_selector selector, layout dense_matrix_layout, transpose transpose_A, - transpose transpose_B, const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, - sycl::buffer &C, const std::int64_t ldc) { - BACKEND::gemm(selector.get_queue(), dense_matrix_layout, transpose_A, transpose_B, alpha, - A_handle, B, columns, ldb, beta, C, ldc); -} - -template -std::enable_if_t, sycl::event> gemm( - backend_selector selector, layout dense_matrix_layout, transpose transpose_A, - transpose transpose_B, const fpType alpha, matrix_handle_t A_handle, const fpType *B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, fpType *C, - const std::int64_t ldc, const std::vector &dependencies = {}) { - return BACKEND::gemm(selector.get_queue(), dense_matrix_layout, transpose_A, transpose_B, alpha, - A_handle, B, columns, ldb, beta, C, ldc, dependencies); + return BACKEND::spsv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, workspace, dependencies); +} + +inline sycl::event spsv(backend_selector selector, oneapi::mkl::transpose opA, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}) { + return BACKEND::spsv(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, + alg, spsv_descr, dependencies); } diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp index 131e0545a..86a00f507 100644 --- a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp @@ -20,81 +20,186 @@ #ifndef _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_ #define _ONEMKL_SPARSE_BLAS_DETAIL_SPARSE_BLAS_RT_HPP_ +#include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" #include "oneapi/mkl/sparse_blas/types.hpp" namespace oneapi { namespace mkl { namespace sparse { -void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle); - -sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, +// Dense vector +template +std::enable_if_t> init_dense_vector( + sycl::queue &queue, dense_vector_handle_t *p_dvhandle, std::int64_t size, + sycl::buffer val); +template +std::enable_if_t> init_dense_vector( + sycl::queue &queue, dense_vector_handle_t *p_dvhandle, std::int64_t size, dataType *val); + +template +std::enable_if_t> set_dense_vector_data( + sycl::queue &queue, dense_vector_handle_t dvhandle, std::int64_t size, + sycl::buffer val); +template +std::enable_if_t> set_dense_vector_data( + sycl::queue &queue, dense_vector_handle_t dvhandle, std::int64_t size, dataType *val); + +sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, + const std::vector &dependencies = {}); + +// Dense matrix +template +std::enable_if_t> init_dense_matrix( + sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); +template +std::enable_if_t> init_dense_matrix( + sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType *val); + +template +std::enable_if_t> set_dense_matrix_data( + sycl::queue &queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); +template +std::enable_if_t> set_dense_matrix_data( + sycl::queue &queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType *val); + +sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, + const std::vector &dependencies = {}); + +// COO matrix +template +std::enable_if_t> init_coo_matrix( + sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val); +template +std::enable_if_t> init_coo_matrix( + sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ind, indexType *col_ind, dataType *val); + +template +std::enable_if_t> set_coo_matrix_data( + sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val); +template +std::enable_if_t> set_coo_matrix_data( + sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ind, indexType *col_ind, dataType *val); + +// CSR matrix +template +std::enable_if_t> init_csr_matrix( + sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val); +template +std::enable_if_t> init_csr_matrix( + sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ptr, indexType *col_ind, dataType *val); + +template +std::enable_if_t> set_csr_matrix_data( + sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val); +template +std::enable_if_t> set_csr_matrix_data( + sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType *row_ptr, indexType *col_ind, dataType *val); + +// Common sparse matrix functions +sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, const std::vector &dependencies = {}); -template -std::enable_if_t> set_csr_data( - sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, - index_base index, sycl::buffer &row_ptr, sycl::buffer &col_ind, - sycl::buffer &val); +bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property); -template -std::enable_if_t, sycl::event> set_csr_data( - sycl::queue &queue, matrix_handle_t handle, intType num_rows, intType num_cols, intType nnz, - index_base index, intType *row_ptr, intType *col_ind, fpType *val, - const std::vector &dependencies = {}); +// SPMM +void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr); -sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, matrix_handle_t handle, - const std::vector &dependencies = {}); +sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, + const std::vector &dependencies = {}); -sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, transpose transpose_B, - layout dense_matrix_layout, const std::int64_t columns, - matrix_handle_t handle, - const std::vector &dependencies = {}); +void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, + std::size_t &temp_buffer_size); + +void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, + spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace); -sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, matrix_handle_t handle, +sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void *workspace, const std::vector &dependencies = {}); -sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t handle, +sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, + spmm_alg alg, spmm_descr_t spmm_descr, + const std::vector &dependencies = {}); + +// SPMV +void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr); + +sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, + const std::vector &dependencies = {}); + +void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t &temp_buffer_size); + +void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, sycl::buffer workspace); + +sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, + void *workspace, const std::vector &dependencies = {}); + +sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, const std::vector &dependencies = {}); + +// SPSV +void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr); + +sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}); + +void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size); + +void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace); + +sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, void *workspace, const std::vector &dependencies = {}); -template -std::enable_if_t> gemv( - sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, - sycl::buffer &x, const fpType beta, sycl::buffer &y); - -template -std::enable_if_t, sycl::event> gemv( - sycl::queue &queue, transpose transpose_val, const fpType alpha, matrix_handle_t A_handle, - const fpType *x, const fpType beta, fpType *y, - const std::vector &dependencies = {}); - -template -std::enable_if_t> trsv(sycl::queue &queue, uplo uplo_val, - transpose transpose_val, diag diag_val, - matrix_handle_t A_handle, - sycl::buffer &x, - sycl::buffer &y); - -template -std::enable_if_t, sycl::event> trsv( - sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t A_handle, const fpType *x, fpType *y, - const std::vector &dependencies = {}); - -template -std::enable_if_t> gemm( - sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, matrix_handle_t A_handle, sycl::buffer &B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, - sycl::buffer &C, const std::int64_t ldc); - -template -std::enable_if_t, sycl::event> gemm( - sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, matrix_handle_t A_handle, const fpType *B, const std::int64_t columns, - const std::int64_t ldb, const fpType beta, fpType *C, const std::int64_t ldc, - const std::vector &dependencies = {}); +sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector &dependencies = {}); } // namespace sparse } // namespace mkl diff --git a/include/oneapi/mkl/sparse_blas/matrix_view.hpp b/include/oneapi/mkl/sparse_blas/matrix_view.hpp new file mode 100644 index 000000000..08762c5d7 --- /dev/null +++ b/include/oneapi/mkl/sparse_blas/matrix_view.hpp @@ -0,0 +1,51 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_ +#define _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_ + +#include "oneapi/mkl/types.hpp" + +namespace oneapi { +namespace mkl { +namespace sparse { + +enum class matrix_descr { + general, + symmetric, + hermitian, + triangular, + diagonal, +}; + +struct matrix_view { + matrix_descr type_view = matrix_descr::general; + uplo uplo_view = uplo::lower; + diag diag_view = diag::nonunit; + + matrix_view() = default; + + matrix_view(matrix_descr type_view) : type_view(type_view) {} +}; + +} // namespace sparse +} // namespace mkl +} // namespace oneapi + +#endif // _ONEMKL_SPARSE_BLAS_MATRIX_VIEW_HPP_ diff --git a/include/oneapi/mkl/sparse_blas/types.hpp b/include/oneapi/mkl/sparse_blas/types.hpp index 406c7dd1f..d4aea3e88 100644 --- a/include/oneapi/mkl/sparse_blas/types.hpp +++ b/include/oneapi/mkl/sparse_blas/types.hpp @@ -20,22 +20,50 @@ #ifndef _ONEMKL_SPARSE_BLAS_TYPES_HPP_ #define _ONEMKL_SPARSE_BLAS_TYPES_HPP_ -#if __has_include() -#include -#else -#include -#endif - -#include - #include "oneapi/mkl/types.hpp" -#include "detail/helper_types.hpp" +#include "matrix_view.hpp" +#include "detail/handles.hpp" +#include "detail/operation_types.hpp" + +/** + * @file Include and define the sparse types that are common between close-source MKL API and oneMKL API. +*/ namespace oneapi { namespace mkl { namespace sparse { -using matrix_handle_t = detail::matrix_handle*; +enum class matrix_property { + symmetric, + sorted, +}; + +enum class spmm_alg { + default_alg, + no_optimize_alg, + coo_alg1, + coo_alg2, + coo_alg3, + coo_alg4, + csr_alg1, + csr_alg2, + csr_alg3, +}; + +enum class spmv_alg { + default_alg, + no_optimize_alg, + coo_alg1, + coo_alg2, + csr_alg1, + csr_alg2, + csr_alg3, +}; + +enum class spsv_alg { + default_alg, + no_optimize_alg, +}; } // namespace sparse } // namespace mkl diff --git a/src/sparse_blas/CMakeLists.txt b/src/sparse_blas/CMakeLists.txt index b93902f49..b01cc63fd 100644 --- a/src/sparse_blas/CMakeLists.txt +++ b/src/sparse_blas/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/sparse_blas/backends/CMakeLists.txt b/src/sparse_blas/backends/CMakeLists.txt index ef606c6e1..294040808 100644 --- a/src/sparse_blas/backends/CMakeLists.txt +++ b/src/sparse_blas/backends/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/sparse_blas/backends/backend_wrappers.cxx b/src/sparse_blas/backends/backend_wrappers.cxx index 2c8161249..fff743e82 100644 --- a/src/sparse_blas/backends/backend_wrappers.cxx +++ b/src/sparse_blas/backends/backend_wrappers.cxx @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ /* This file lists functions matching those required by sparse_blas_function_table_t in @@ -35,51 +35,81 @@ Changes to this file should be matched to changes in sparse_blas/function_table. function template instantiations must be added to backend_sparse_blas_instantiations.cxx. */ +#define REPEAT_FOR_EACH_FP_TYPE(DEFINE_MACRO) \ + DEFINE_MACRO() \ + DEFINE_MACRO() \ + DEFINE_MACRO() \ + DEFINE_MACRO() + +#define REPEAT_FOR_EACH_FP_AND_INT_TYPE(DEFINE_MACRO) \ + REPEAT_FOR_EACH_FP_TYPE(DEFINE_MACRO) \ + REPEAT_FOR_EACH_FP_TYPE(DEFINE_MACRO) + // clang-format off -oneapi::mkl::sparse::BACKEND::init_matrix_handle, -oneapi::mkl::sparse::BACKEND::release_matrix_handle, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::set_csr_data, -oneapi::mkl::sparse::BACKEND::optimize_gemm, -oneapi::mkl::sparse::BACKEND::optimize_gemm, -oneapi::mkl::sparse::BACKEND::optimize_gemv, -oneapi::mkl::sparse::BACKEND::optimize_trsv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::gemv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::trsv, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, -oneapi::mkl::sparse::BACKEND::gemm, +// Dense vector +#define LIST_DENSE_VECTOR_FUNCS() \ +oneapi::mkl::sparse::BACKEND::init_dense_vector, \ +oneapi::mkl::sparse::BACKEND::init_dense_vector, \ +oneapi::mkl::sparse::BACKEND::set_dense_vector_data, \ +oneapi::mkl::sparse::BACKEND::set_dense_vector_data, +REPEAT_FOR_EACH_FP_TYPE(LIST_DENSE_VECTOR_FUNCS) +#undef LIST_DENSE_VECTOR_FUNCS +oneapi::mkl::sparse::BACKEND::release_dense_vector, + +// Dense matrix +#define LIST_DENSE_MATRIX_FUNCS() \ +oneapi::mkl::sparse::BACKEND::init_dense_matrix, \ +oneapi::mkl::sparse::BACKEND::init_dense_matrix, \ +oneapi::mkl::sparse::BACKEND::set_dense_matrix_data, \ +oneapi::mkl::sparse::BACKEND::set_dense_matrix_data, +REPEAT_FOR_EACH_FP_TYPE(LIST_DENSE_MATRIX_FUNCS) +#undef LIST_DENSE_MATRIX_FUNCS +oneapi::mkl::sparse::BACKEND::release_dense_matrix, + +// COO matrix +#define LIST_COO_MATRIX_FUNCS() \ +oneapi::mkl::sparse::BACKEND::init_coo_matrix, \ +oneapi::mkl::sparse::BACKEND::init_coo_matrix, \ +oneapi::mkl::sparse::BACKEND::set_coo_matrix_data, \ +oneapi::mkl::sparse::BACKEND::set_coo_matrix_data, +REPEAT_FOR_EACH_FP_AND_INT_TYPE(LIST_COO_MATRIX_FUNCS) +#undef LIST_COO_MATRIX_FUNCS + +// CSR matrix +#define LIST_CSR_MATRIX_FUNCS() \ +oneapi::mkl::sparse::BACKEND::init_csr_matrix, \ +oneapi::mkl::sparse::BACKEND::init_csr_matrix, \ +oneapi::mkl::sparse::BACKEND::set_csr_matrix_data, \ +oneapi::mkl::sparse::BACKEND::set_csr_matrix_data, +REPEAT_FOR_EACH_FP_AND_INT_TYPE(LIST_CSR_MATRIX_FUNCS) +#undef LIST_CSR_MATRIX_FUNCS + +// Common sparse matrix functions +oneapi::mkl::sparse::BACKEND::release_sparse_matrix, +oneapi::mkl::sparse::BACKEND::set_matrix_property, + +// SPMM +oneapi::mkl::sparse::BACKEND::init_spmm_descr, +oneapi::mkl::sparse::BACKEND::release_spmm_descr, +oneapi::mkl::sparse::BACKEND::spmm_buffer_size, +oneapi::mkl::sparse::BACKEND::spmm_optimize, +oneapi::mkl::sparse::BACKEND::spmm_optimize, +oneapi::mkl::sparse::BACKEND::spmm, + +// SPMV +oneapi::mkl::sparse::BACKEND::init_spmv_descr, +oneapi::mkl::sparse::BACKEND::release_spmv_descr, +oneapi::mkl::sparse::BACKEND::spmv_buffer_size, +oneapi::mkl::sparse::BACKEND::spmv_optimize, +oneapi::mkl::sparse::BACKEND::spmv_optimize, +oneapi::mkl::sparse::BACKEND::spmv, + +// SPSV +oneapi::mkl::sparse::BACKEND::init_spsv_descr, +oneapi::mkl::sparse::BACKEND::release_spsv_descr, +oneapi::mkl::sparse::BACKEND::spsv_buffer_size, +oneapi::mkl::sparse::BACKEND::spsv_optimize, +oneapi::mkl::sparse::BACKEND::spsv_optimize, +oneapi::mkl::sparse::BACKEND::spsv, + // clang-format on diff --git a/src/sparse_blas/backends/mkl_common/mkl_basic.cxx b/src/sparse_blas/backends/mkl_common/mkl_basic.cxx deleted file mode 100644 index fd3b1563a..000000000 --- a/src/sparse_blas/backends/mkl_common/mkl_basic.cxx +++ /dev/null @@ -1,62 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -void init_matrix_handle(sycl::queue & /*queue*/, detail::matrix_handle **p_handle) { - oneapi::mkl::sparse::init_matrix_handle(detail::get_handle(p_handle)); -} - -sycl::event release_matrix_handle(sycl::queue &queue, detail::matrix_handle **p_handle, - const std::vector &dependencies) { - return oneapi::mkl::sparse::release_matrix_handle(queue, detail::get_handle(p_handle), - dependencies); -} - -template -std::enable_if_t> set_csr_data( - sycl::queue &queue, detail::matrix_handle *handle, intType num_rows, intType num_cols, - intType /*nnz*/, index_base index, sycl::buffer &row_ptr, - sycl::buffer &col_ind, sycl::buffer &val) { - oneapi::mkl::sparse::set_csr_data(queue, detail::get_handle(handle), num_rows, num_cols, index, - row_ptr, col_ind, val); -} - -template -std::enable_if_t, sycl::event> set_csr_data( - sycl::queue &queue, detail::matrix_handle *handle, intType num_rows, intType num_cols, - intType /*nnz*/, index_base index, intType *row_ptr, intType *col_ind, fpType *val, - const std::vector &dependencies) { - return oneapi::mkl::sparse::set_csr_data(queue, detail::get_handle(handle), num_rows, num_cols, - index, row_ptr, col_ind, val, dependencies); -} - -#define INSTANTIATE_SET_CSR_DATA(FP_TYPE, INT_TYPE) \ - template std::enable_if_t> \ - set_csr_data( \ - sycl::queue & queue, detail::matrix_handle * handle, INT_TYPE num_rows, INT_TYPE num_cols, \ - INT_TYPE nnz, index_base index, sycl::buffer & row_ptr, \ - sycl::buffer & col_ind, sycl::buffer & val); \ - template std::enable_if_t, sycl::event> \ - set_csr_data(sycl::queue & queue, detail::matrix_handle * handle, \ - INT_TYPE num_rows, INT_TYPE num_cols, INT_TYPE nnz, \ - index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, \ - FP_TYPE * val, const std::vector &dependencies) - -FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_SET_CSR_DATA); - -#undef INSTANTIATE_SET_CSR_DATA diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx new file mode 100644 index 000000000..3ae84ca64 --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -0,0 +1,393 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +// Dense vector +template +void init_dense_vector(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t *p_dvhandle, std::int64_t size, + sycl::buffer val) { + *p_dvhandle = new oneapi::mkl::sparse::dense_vector_handle(val, size); +} + +template +void init_dense_vector(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t *p_dvhandle, std::int64_t size, + fpType *val) { + *p_dvhandle = new oneapi::mkl::sparse::dense_vector_handle(val, size); +} + +template +void check_can_reset_value_handle(const std::string &function_name, + InternalHandleT *internal_handle, bool expect_buffer) { + if (internal_handle->get_value_type() != detail::get_data_type()) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible data types expected " + + data_type_to_str(internal_handle->get_value_type()) + " but got " + + data_type_to_str(detail::get_data_type())); + } + if (internal_handle->all_use_buffer() != expect_buffer) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, "Cannot change the container type between buffer or USM"); + } +} + +template +void set_dense_vector_data(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, + sycl::buffer val) { + check_can_reset_value_handle(__func__, dvhandle, true); + dvhandle->size = size; + dvhandle->set_buffer(val); +} + +template +void set_dense_vector_data(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, + fpType *val) { + check_can_reset_value_handle(__func__, dvhandle, false); + dvhandle->size = size; + dvhandle->set_usm_ptr(val); +} + +#define INSTANTIATE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + template void init_dense_vector( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, sycl::buffer val); \ + template void init_dense_vector( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, FP_TYPE * val); \ + template void set_dense_vector_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, sycl::buffer val); \ + template void set_dense_vector_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, FP_TYPE * val) +FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_VECTOR_FUNCS); +#undef INSTANTIATE_DENSE_VECTOR_FUNCS + +sycl::event release_dense_vector(sycl::queue &queue, + oneapi::mkl::sparse::dense_vector_handle_t dvhandle, + const std::vector &dependencies) { + return detail::submit_release(queue, dvhandle, dependencies); +} + +// Dense matrix +template +void init_dense_matrix(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + oneapi::mkl::layout dense_layout, sycl::buffer val) { + *p_dmhandle = + new oneapi::mkl::sparse::dense_matrix_handle(val, num_rows, num_cols, ld, dense_layout); +} + +template +void init_dense_matrix(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t *p_dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + oneapi::mkl::layout dense_layout, fpType *val) { + *p_dmhandle = + new oneapi::mkl::sparse::dense_matrix_handle(val, num_rows, num_cols, ld, dense_layout); +} + +template +void set_dense_matrix_data(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + oneapi::mkl::layout dense_layout, sycl::buffer val) { + check_can_reset_value_handle(__func__, dmhandle, true); + dmhandle->num_rows = num_rows; + dmhandle->num_cols = num_cols; + dmhandle->ld = ld; + dmhandle->dense_layout = dense_layout; + dmhandle->set_buffer(val); +} + +template +void set_dense_matrix_data(sycl::queue & /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + oneapi::mkl::layout dense_layout, fpType *val) { + check_can_reset_value_handle(__func__, dmhandle, false); + dmhandle->num_rows = num_rows; + dmhandle->num_cols = num_cols; + dmhandle->ld = ld; + dmhandle->dense_layout = dense_layout; + dmhandle->set_usm_ptr(val); +} + +#define INSTANTIATE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + template void init_dense_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + template void init_dense_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE * val); \ + template void set_dense_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + template void set_dense_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE * val) +FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_MATRIX_FUNCS); +#undef INSTANTIATE_DENSE_MATRIX_FUNCS + +sycl::event release_dense_matrix(sycl::queue &queue, + oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, + const std::vector &dependencies) { + return detail::submit_release(queue, dmhandle, dependencies); +} + +// COO matrix +template +void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + oneapi::mkl::index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val) { + oneapi::mkl::sparse::matrix_handle_t mkl_handle; + oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); + auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val); + // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. + oneapi::mkl::sparse::set_coo_data(queue, mkl_handle, static_cast(num_rows), + static_cast(num_cols), static_cast(nnz), + index, internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer()); + *p_smhandle = reinterpret_cast(internal_smhandle); +} + +template +void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, + fpType *val) { + oneapi::mkl::sparse::matrix_handle_t mkl_handle; + oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); + auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val); + auto event = oneapi::mkl::sparse::set_coo_data( + queue, mkl_handle, static_cast(num_rows), static_cast(num_cols), + static_cast(nnz), index, row_ind, col_ind, val); + event.wait_and_throw(); + *p_smhandle = reinterpret_cast(internal_smhandle); +} + +template +void check_can_reset_sparse_handle(const std::string &function_name, + detail::sparse_matrix_handle *internal_smhandle, + bool expect_buffer) { + check_can_reset_value_handle(function_name, internal_smhandle, expect_buffer); + if (internal_smhandle->get_int_type() != detail::get_data_type()) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible data types expected " + + data_type_to_str(internal_smhandle->get_int_type()) + " but got " + + data_type_to_str(detail::get_data_type())); + } + if (!internal_smhandle->can_be_reset) { + throw mkl::unimplemented( + "sparse_blas/mkl", function_name, + "Reseting the matrix handle's data after it was used in a computation is not supported."); + } +} + +template +void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + oneapi::mkl::index_base index, sycl::buffer row_ind, + sycl::buffer col_ind, sycl::buffer val) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + check_can_reset_sparse_handle(__func__, internal_smhandle, true); + internal_smhandle->row_container.set_buffer(row_ind); + internal_smhandle->col_container.set_buffer(col_ind); + internal_smhandle->value_container.set_buffer(val); + // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. + oneapi::mkl::sparse::set_coo_data(queue, internal_smhandle->backend_handle, + static_cast(num_rows), + static_cast(num_cols), static_cast(nnz), + index, internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer()); +} + +template +void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, + oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, + fpType *val) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + check_can_reset_sparse_handle(__func__, internal_smhandle, false); + internal_smhandle->row_container.set_usm_ptr(row_ind); + internal_smhandle->col_container.set_usm_ptr(col_ind); + internal_smhandle->value_container.set_usm_ptr(val); + auto event = oneapi::mkl::sparse::set_coo_data( + queue, internal_smhandle->backend_handle, static_cast(num_rows), + static_cast(num_cols), static_cast(nnz), index, row_ind, col_ind, val); + event.wait_and_throw(); +} + +#define INSTANTIATE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template void init_coo_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, sycl::buffer row_ind, \ + sycl::buffer col_ind, sycl::buffer val); \ + template void init_coo_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val); \ + template void set_coo_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + sycl::buffer row_ind, sycl::buffer col_ind, \ + sycl::buffer val); \ + template void set_coo_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val) +FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_COO_MATRIX_FUNCS); +#undef INSTANTIATE_COO_MATRIX_FUNCS + +// CSR matrix +template +void init_csr_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, + oneapi::mkl::index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val) { + oneapi::mkl::sparse::matrix_handle_t mkl_handle; + oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); + auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val); + // The backend deduces nnz from row_ptr. + // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. + oneapi::mkl::sparse::set_csr_data(queue, mkl_handle, static_cast(num_rows), + static_cast(num_cols), index, + internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer()); + *p_smhandle = reinterpret_cast(internal_smhandle); +} + +template +void init_csr_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, + oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, + fpType *val) { + oneapi::mkl::sparse::matrix_handle_t mkl_handle; + oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); + auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val); + // The backend deduces nnz from row_ptr. + auto event = oneapi::mkl::sparse::set_csr_data( + queue, mkl_handle, static_cast(num_rows), static_cast(num_cols), index, + row_ptr, col_ind, val); + event.wait_and_throw(); + *p_smhandle = reinterpret_cast(internal_smhandle); +} + +template +void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, + oneapi::mkl::index_base index, sycl::buffer row_ptr, + sycl::buffer col_ind, sycl::buffer val) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + check_can_reset_sparse_handle(__func__, internal_smhandle, true); + internal_smhandle->row_container.set_buffer(row_ptr); + internal_smhandle->col_container.set_buffer(col_ind); + internal_smhandle->value_container.set_buffer(val); + // The backend deduces nnz from row_ptr. + // The backend handle must use the buffers from the internal handle as they will be kept alive until the handle is released. + oneapi::mkl::sparse::set_csr_data(queue, internal_smhandle->backend_handle, + static_cast(num_rows), + static_cast(num_cols), index, + internal_smhandle->row_container.get_buffer(), + internal_smhandle->col_container.get_buffer(), + internal_smhandle->value_container.get_buffer()); +} + +template +void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, + oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, + fpType *val) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + check_can_reset_sparse_handle(__func__, internal_smhandle, false); + internal_smhandle->row_container.set_usm_ptr(row_ptr); + internal_smhandle->col_container.set_usm_ptr(col_ind); + internal_smhandle->value_container.set_usm_ptr(val); + // The backend deduces nnz from row_ptr. + auto event = oneapi::mkl::sparse::set_csr_data( + queue, internal_smhandle->backend_handle, static_cast(num_rows), + static_cast(num_cols), index, row_ptr, col_ind, val); + event.wait_and_throw(); +} + +#define INSTANTIATE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template void init_csr_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, sycl::buffer row_ptr, \ + sycl::buffer col_ind, sycl::buffer val); \ + template void init_csr_matrix( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val); \ + template void set_csr_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + sycl::buffer row_ptr, sycl::buffer col_ind, \ + sycl::buffer val); \ + template void set_csr_matrix_data( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val) +FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_CSR_MATRIX_FUNCS); +#undef INSTANTIATE_CSR_MATRIX_FUNCS + +// Common sparse matrix functions +sycl::event release_sparse_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + const std::vector &dependencies) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + // Asynchronously release the backend's handle followed by the internal handle. + auto event = oneapi::mkl::sparse::release_matrix_handle( + queue, &internal_smhandle->backend_handle, dependencies); + return detail::submit_release(queue, internal_smhandle, event); +} + +bool set_matrix_property(sycl::queue & /*queue*/, oneapi::mkl::sparse::matrix_handle_t smhandle, + oneapi::mkl::sparse::matrix_property property) { + auto internal_smhandle = detail::get_internal_handle(smhandle); + // Store the matrix property internally for better error checking + internal_smhandle->set_matrix_property(property); + // Set the matrix property on the backend handle + // Backend and oneMKL interface types for the property don't match + switch (property) { + case oneapi::mkl::sparse::matrix_property::symmetric: + oneapi::mkl::sparse::set_matrix_property(internal_smhandle->backend_handle, + oneapi::mkl::sparse::property::symmetric); + return true; + case oneapi::mkl::sparse::matrix_property::sorted: + oneapi::mkl::sparse::set_matrix_property(internal_smhandle->backend_handle, + oneapi::mkl::sparse::property::sorted); + return true; + default: return false; + } +} diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.hpp b/src/sparse_blas/backends/mkl_common/mkl_handles.hpp new file mode 100644 index 000000000..efadd72e7 --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.hpp @@ -0,0 +1,80 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_ + +// MKLCPU and MKLGPU backends include +// This include defines its own oneapi::mkl::sparse namespace with some of the +// types that are used here: matrix_handle_t, index_base, transpose, uplo, diag. +#include + +#include "sparse_blas/generic_container.hpp" + +namespace oneapi::mkl::sparse { + +// Complete the definition of incomplete types dense_vector_handle and +// dense_matrix_handle as they don't exist in oneMKL backends yet. + +struct dense_vector_handle : public detail::generic_dense_vector_handle { + template + dense_vector_handle(T* value_ptr, std::int64_t size) + : detail::generic_dense_vector_handle(nullptr, value_ptr, size) {} + + template + dense_vector_handle(const sycl::buffer value_buffer, std::int64_t size) + : detail::generic_dense_vector_handle(nullptr, value_buffer, size) {} +}; + +struct dense_matrix_handle : public detail::generic_dense_matrix_handle { + template + dense_matrix_handle(T* value_ptr, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, + layout dense_layout) + : detail::generic_dense_matrix_handle(nullptr, value_ptr, num_rows, num_cols, ld, + dense_layout) {} + + template + dense_matrix_handle(const sycl::buffer value_buffer, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout) + : detail::generic_dense_matrix_handle(nullptr, value_buffer, num_rows, num_cols, + ld, dense_layout) {} +}; + +} // namespace oneapi::mkl::sparse + +namespace oneapi::mkl::sparse::detail { + +/** + * Internal sparse_matrix_handle type for MKL backends. + * Here \p matrix_handle_t is the type of the backend's handle. + * The user-facing incomplete type matrix_handle_t must be kept incomplete. + * Internally matrix_handle_t is reinterpret_cast as + * oneapi::mkl::sparse::detail::sparse_matrix_handle which holds another + * matrix_handle_t for the backend handle. + */ +using sparse_matrix_handle = detail::generic_sparse_handle; + +/// Cast to oneMKL's interface handle type +inline auto get_internal_handle(matrix_handle_t handle) { + return reinterpret_cast(handle); +} + +} // namespace oneapi::mkl::sparse::detail + +#endif // _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HANDLES_HPP_ diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index da5235ee0..ca15c5b4f 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -1,56 +1,111 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ -// MKLCPU and MKLGPU backends include -// This include defines its own oneapi::mkl::sparse namespace with some of the types that are used here: matrix_handle_t, index_base, transpose, uolo, diag. -#include +#ifndef _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HELPER_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HELPER_HPP_ -// Includes are set up so that oneapi::mkl::sparse namespace refers to the MKLCPU and MKLGPU backends namespace (oneMKL product) -// in this file. -// oneapi::mkl::sparse::detail namespace refers to the oneMKL interface namespace. +#if __has_include() +#include +#else +#include +#endif +#include "oneapi/mkl/exceptions.hpp" #include "oneapi/mkl/sparse_blas/detail/helper_types.hpp" +#include "sparse_blas/enum_data_types.hpp" +#include "sparse_blas/macros.hpp" + namespace oneapi::mkl::sparse::detail { -inline auto get_handle(detail::matrix_handle **handle) { - return reinterpret_cast(handle); +/// Return whether a pointer is accessible on the host +template +inline bool is_ptr_accessible_on_host(sycl::queue &queue, const T *host_or_device_ptr) { + auto alloc_type = sycl::get_pointer_type(host_or_device_ptr, queue.get_context()); + return alloc_type == sycl::usm::alloc::host || alloc_type == sycl::usm::alloc::shared || + alloc_type == sycl::usm::alloc::unknown; } -inline auto get_handle(detail::matrix_handle *handle) { - return reinterpret_cast(handle); +/// Throw an exception if the scalar is not accessible in the host +inline void check_ptr_is_host_accessible(const std::string &function_name, + const std::string &scalar_name, + bool is_ptr_accessible_on_host) { + if (!is_ptr_accessible_on_host) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "Scalar " + scalar_name + " must be accessible on the host for buffer functions."); + } } +/// Return a scalar on the host from a pointer to host or device memory +/// Used for USM functions +template +inline T get_scalar_on_host(sycl::queue &queue, const T *host_or_device_ptr, + bool is_ptr_accessible_on_host) { + if (is_ptr_accessible_on_host) { + return *host_or_device_ptr; + } + T scalar; + auto event = queue.copy(host_or_device_ptr, &scalar, 1); + event.wait_and_throw(); + return scalar; +} + +/// Merge multiple event dependencies into one +inline sycl::event collapse_dependencies(sycl::queue &queue, + const std::vector &dependencies) { + if (dependencies.empty()) { + return {}; + } + else if (dependencies.size() == 1) { + return dependencies[0]; + } + + return queue.submit([&](sycl::handler &cgh) { + cgh.depends_on(dependencies); + cgh.host_task([=]() {}); + }); +} + +/// Convert \p value_type to template type argument and use it to call \p op_functor. +#define DISPATCH_MKL_OPERATION(function_name, value_type, op_functor, ...) \ + switch (value_type) { \ + case detail::data_type::real_fp32: return op_functor(__VA_ARGS__); \ + case detail::data_type::real_fp64: return op_functor(__VA_ARGS__); \ + case detail::data_type::complex_fp32: return op_functor>(__VA_ARGS__); \ + case detail::data_type::complex_fp64: \ + return op_functor>(__VA_ARGS__); \ + default: \ + throw oneapi::mkl::exception( \ + "sparse_blas", function_name, \ + "Internal error: unsupported type " + data_type_to_str(value_type)); \ + } + +#define CHECK_DESCR_MATCH(descr, argument, optimize_func_name) \ + do { \ + if (descr->last_optimized_##argument != argument) { \ + throw mkl::invalid_argument( \ + "sparse_blas", __func__, \ + #argument " argument must match with the previous call to " #optimize_func_name); \ + } \ + } while (0) + } // namespace oneapi::mkl::sparse::detail -#define FOR_EACH_FP_TYPE(INSTANTIATE_MACRO) \ - INSTANTIATE_MACRO(float); \ - INSTANTIATE_MACRO(double); \ - INSTANTIATE_MACRO(std::complex); \ - INSTANTIATE_MACRO(std::complex) - -#define FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, INT_TYPE) \ - INSTANTIATE_MACRO(float, INT_TYPE); \ - INSTANTIATE_MACRO(double, INT_TYPE); \ - INSTANTIATE_MACRO(std::complex, INT_TYPE); \ - INSTANTIATE_MACRO(std::complex, INT_TYPE) - -#define FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_MACRO) \ - FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, std::int32_t); \ - FOR_EACH_FP_AND_INT_TYPE_HELPER(INSTANTIATE_MACRO, std::int64_t) +#endif // _ONEMKL_SRC_SPARSE_BLAS_BACKENDS_MKL_COMMON_MKL_HELPER_HPP_ diff --git a/src/sparse_blas/backends/mkl_common/mkl_operations.cxx b/src/sparse_blas/backends/mkl_common/mkl_operations.cxx deleted file mode 100644 index ba6960341..000000000 --- a/src/sparse_blas/backends/mkl_common/mkl_operations.cxx +++ /dev/null @@ -1,170 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -sycl::event optimize_gemm(sycl::queue& queue, transpose /*transpose_A*/, - detail::matrix_handle* /*handle*/, - const std::vector& dependencies) { - // TODO: Call to optimize_gemm with 2024.1 oneMKL release - // Return an event depending on the dependencies - return queue.submit([=](sycl::handler& cgh) { - cgh.depends_on(dependencies); - cgh.host_task([=]() { /* Empty kernel */ }); - }); -} - -sycl::event optimize_gemm(sycl::queue& queue, transpose /*transpose_A*/, transpose /*transpose_B*/, - layout /*dense_matrix_layout*/, const std::int64_t /*columns*/, - detail::matrix_handle* /*handle*/, - const std::vector& dependencies) { - // TODO: Call to optimize_gemm with 2024.1 oneMKL release - // Return an event depending on the dependencies - return queue.submit([=](sycl::handler& cgh) { - cgh.depends_on(dependencies); - cgh.host_task([=]() { /* Empty kernel */ }); - }); -} - -sycl::event optimize_gemv(sycl::queue& queue, transpose transpose_val, - detail::matrix_handle* handle, - const std::vector& dependencies) { - return oneapi::mkl::sparse::optimize_gemv(queue, transpose_val, detail::get_handle(handle), - dependencies); -} - -sycl::event optimize_trsv(sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, - detail::matrix_handle* handle, - const std::vector& dependencies) { - // TODO: Remove this if condition once Intel oneMKL adds support for trans/conjtrans to optimize_trsv - if (transpose_val != transpose::nontrans) { - throw mkl::unimplemented("sparse_blas/backends/mkl", __FUNCTION__, - "Transposed or conjugate trsv is not supported"); - } - return oneapi::mkl::sparse::optimize_trsv(queue, uplo_val, transpose_val, diag_val, - detail::get_handle(handle), dependencies); -} - -template -std::enable_if_t> gemv( - sycl::queue& queue, transpose transpose_val, const fpType alpha, - detail::matrix_handle* A_handle, sycl::buffer& x, const fpType beta, - sycl::buffer& y) { - oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, detail::get_handle(A_handle), x, beta, y); -} - -template -std::enable_if_t, sycl::event> gemv( - sycl::queue& queue, transpose transpose_val, const fpType alpha, - detail::matrix_handle* A_handle, const fpType* x, const fpType beta, fpType* y, - const std::vector& dependencies) { - return oneapi::mkl::sparse::gemv(queue, transpose_val, alpha, detail::get_handle(A_handle), x, beta, y, - dependencies); -} - -template -std::enable_if_t> trsv(sycl::queue& queue, uplo uplo_val, - transpose transpose_val, diag diag_val, - detail::matrix_handle* A_handle, - sycl::buffer& x, - sycl::buffer& y) { - // TODO: Remove this if condition once Intel oneMKL adds support for trans/conjtrans to trsv - if (transpose_val != transpose::nontrans) { - throw mkl::unimplemented("sparse_blas/backends/mkl", __FUNCTION__, - "Transposed or conjugate trsv is not supported"); - } - oneapi::mkl::sparse::trsv(queue, uplo_val, transpose_val, diag_val, - detail::get_handle(A_handle), x, y); -} - -template -std::enable_if_t, sycl::event> trsv( - sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, - detail::matrix_handle* A_handle, const fpType* x, fpType* y, - const std::vector& dependencies) { - // TODO: Remove this if condition once Intel oneMKL adds support for trans/conjtrans to trsv - if (transpose_val != transpose::nontrans) { - throw mkl::unimplemented("sparse_blas/backends/mkl", __FUNCTION__, - "Transposed or conjugate trsv is not supported"); - } - // TODO: Remove const_cast in future oneMKL release - return oneapi::mkl::sparse::trsv(queue, uplo_val, transpose_val, diag_val, - detail::get_handle(A_handle), const_cast(x), y, - dependencies); -} - -template -std::enable_if_t> gemm( - sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, detail::matrix_handle* A_handle, sycl::buffer& B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, - sycl::buffer& C, const std::int64_t ldc) { - oneapi::mkl::sparse::gemm(queue, dense_matrix_layout, transpose_A, transpose_B, alpha, - detail::get_handle(A_handle), B, columns, ldb, beta, C, ldc); -} - -template -std::enable_if_t, sycl::event> gemm( - sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, transpose transpose_B, - const fpType alpha, detail::matrix_handle* A_handle, const fpType* B, - const std::int64_t columns, const std::int64_t ldb, const fpType beta, fpType* C, - const std::int64_t ldc, const std::vector& dependencies) { - // TODO: Remove const_cast in future oneMKL release - return oneapi::mkl::sparse::gemm(queue, dense_matrix_layout, transpose_A, transpose_B, alpha, - detail::get_handle(A_handle), const_cast(B), columns, - ldb, beta, C, ldc, dependencies); -} - -#define INSTANTIATE_GEMV(FP_TYPE) \ - template std::enable_if_t> gemv( \ - sycl::queue& queue, transpose transpose_val, const FP_TYPE alpha, \ - detail::matrix_handle* A_handle, sycl::buffer& x, const FP_TYPE beta, \ - sycl::buffer& y); \ - template std::enable_if_t, sycl::event> gemv( \ - sycl::queue& queue, transpose transpose_val, const FP_TYPE alpha, \ - detail::matrix_handle* A_handle, const FP_TYPE* x, const FP_TYPE beta, FP_TYPE* y, \ - const std::vector& dependencies) - -#define INSTANTIATE_TRSV(FP_TYPE) \ - template std::enable_if_t> trsv( \ - sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ - detail::matrix_handle* A_handle, sycl::buffer& x, \ - sycl::buffer& y); \ - template std::enable_if_t, sycl::event> trsv( \ - sycl::queue& queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ - detail::matrix_handle* A_handle, const FP_TYPE* x, FP_TYPE* y, \ - const std::vector& dependencies) - -#define INSTANTIATE_GEMM(FP_TYPE) \ - template std::enable_if_t> gemm( \ - sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, \ - transpose transpose_B, const FP_TYPE alpha, detail::matrix_handle* A_handle, \ - sycl::buffer& B, const std::int64_t columns, const std::int64_t ldb, \ - const FP_TYPE beta, sycl::buffer& C, const std::int64_t ldc); \ - template std::enable_if_t, sycl::event> gemm( \ - sycl::queue& queue, layout dense_matrix_layout, transpose transpose_A, \ - transpose transpose_B, const FP_TYPE alpha, detail::matrix_handle* A_handle, \ - const FP_TYPE* B, const std::int64_t columns, const std::int64_t ldb, const FP_TYPE beta, \ - FP_TYPE* C, const std::int64_t ldc, const std::vector& dependencies) - -FOR_EACH_FP_TYPE(INSTANTIATE_GEMV); -FOR_EACH_FP_TYPE(INSTANTIATE_TRSV); -FOR_EACH_FP_TYPE(INSTANTIATE_GEMM); - -#undef INSTANTIATE_GEMV -#undef INSTANTIATE_TRSV -#undef INSTANTIATE_GEMM diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx new file mode 100644 index 000000000..dad611252 --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -0,0 +1,245 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +namespace oneapi::mkl::sparse { + +struct spmm_descr { + bool buffer_size_called = false; + bool optimized_called = false; + oneapi::mkl::transpose last_optimized_opA; + oneapi::mkl::transpose last_optimized_opB; + oneapi::mkl::sparse::matrix_view last_optimized_A_view; + oneapi::mkl::sparse::matrix_handle_t last_optimized_A_handle; + oneapi::mkl::sparse::dense_matrix_handle_t last_optimized_B_handle; + oneapi::mkl::sparse::dense_matrix_handle_t last_optimized_C_handle; + oneapi::mkl::sparse::spmm_alg last_optimized_alg; +}; + +} // namespace oneapi::mkl::sparse + +namespace oneapi::mkl::sparse::BACKEND { + +void init_spmm_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spmm_descr_t *p_spmm_descr) { + *p_spmm_descr = new spmm_descr(); +} + +sycl::event release_spmm_descr(sycl::queue &queue, oneapi::mkl::sparse::spmm_descr_t spmm_descr, + const std::vector &dependencies) { + return detail::submit_release(queue, spmm_descr, dependencies); +} + +void check_valid_spmm(const std::string &function_name, oneapi::mkl::transpose opA, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + bool is_alpha_host_accessible, bool is_beta_host_accessible) { + THROW_IF_NULLPTR(function_name, A_handle); + THROW_IF_NULLPTR(function_name, B_handle); + THROW_IF_NULLPTR(function_name, C_handle); + + auto internal_A_handle = detail::get_internal_handle(A_handle); + detail::check_all_containers_compatible(function_name, internal_A_handle, B_handle, C_handle); + if (internal_A_handle->all_use_buffer()) { + detail::check_ptr_is_host_accessible("spmm", "alpha", is_alpha_host_accessible); + detail::check_ptr_is_host_accessible("spmm", "beta", is_beta_host_accessible); + } + if (is_alpha_host_accessible != is_beta_host_accessible) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "Alpha and beta must both be placed on host memory or device memory."); + } + if (B_handle->dense_layout != C_handle->dense_layout) { + throw mkl::invalid_argument("sparse_blas", function_name, + "B and C matrices must used the same layout."); + } + + if (A_view.type_view != oneapi::mkl::sparse::matrix_descr::general) { + throw mkl::invalid_argument("sparse_blas", function_name, + "Matrix view's type must be `matrix_descr::general`."); + } + + if (A_view.diag_view != oneapi::mkl::diag::nonunit) { + throw mkl::invalid_argument("sparse_blas", function_name, + "Matrix's diag_view must be `nonunit`."); + } + +#if BACKEND == gpu + detail::data_type data_type = internal_A_handle->get_value_type(); + if ((data_type == detail::data_type::complex_fp32 || + data_type == detail::data_type::complex_fp64) && + opA == oneapi::mkl::transpose::conjtrans && + internal_A_handle->has_matrix_property(oneapi::mkl::sparse::matrix_property::symmetric)) { + throw mkl::unimplemented( + "sparse_blas", function_name, + "The backend does not support spmm using conjtrans and the symmetric property."); + } +#else + (void)opA; +#endif // BACKEND +} + +void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose /*opB*/, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg /*alg*/, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, std::size_t &temp_buffer_size) { + // TODO: Add support for external workspace once the close-source oneMKL backend supports it. + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, + is_beta_host_accessible); + temp_buffer_size = 0; + spmm_descr->buffer_size_called = true; +} + +inline void common_spmm_optimize( + sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmm("spmm_optimize", opA, A_view, A_handle, B_handle, C_handle, + is_alpha_host_accessible, is_beta_host_accessible); + if (!spmm_descr->buffer_size_called) { + throw mkl::uninitialized("sparse_blas", "spmm_optimize", + "spmm_buffer_size must be called before spmm_optimize."); + } + spmm_descr->optimized_called = true; + spmm_descr->last_optimized_opA = opA; + spmm_descr->last_optimized_opB = opB; + spmm_descr->last_optimized_A_view = A_view; + spmm_descr->last_optimized_A_handle = A_handle; + spmm_descr->last_optimized_B_handle = B_handle; + spmm_descr->last_optimized_C_handle = C_handle; + spmm_descr->last_optimized_alg = alg; +} + +void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, + sycl::buffer /*workspace*/) { + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (!internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__func__); + } + common_spmm_optimize(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, + spmm_descr); + if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { + return; + } + internal_A_handle->can_be_reset = false; + // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it. +} + +sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, void * /*workspace*/, + const std::vector &dependencies) { + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__func__); + } + common_spmm_optimize(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, + spmm_descr); + if (alg == oneapi::mkl::sparse::spmm_alg::no_optimize_alg) { + return detail::collapse_dependencies(queue, dependencies); + } + internal_A_handle->can_be_reset = false; + // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it. + return detail::collapse_dependencies(queue, dependencies); +} + +template +sycl::event internal_spmm( + sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view /*A_view*/, oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg /*alg*/, + oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, const std::vector &dependencies, + bool is_alpha_host_accessible, bool is_beta_host_accessible) { + T host_alpha = + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + T host_beta = + detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + auto layout = B_handle->dense_layout; + auto columns = C_handle->num_cols; + auto ldb = B_handle->ld; + auto ldc = C_handle->ld; + if (internal_A_handle->all_use_buffer()) { + oneapi::mkl::sparse::gemm(queue, layout, opA, opB, host_alpha, + internal_A_handle->backend_handle, B_handle->get_buffer(), + columns, ldb, host_beta, C_handle->get_buffer(), ldc); + // Dependencies are not used for buffers + return {}; + } + else { + return oneapi::mkl::sparse::gemm(queue, layout, opA, opB, host_alpha, + internal_A_handle->backend_handle, + B_handle->get_usm_ptr(), columns, ldb, host_beta, + C_handle->get_usm_ptr(), ldc, dependencies); + } +} + +sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, + const std::vector &dependencies) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, + is_beta_host_accessible); + + if (!spmm_descr->optimized_called) { + throw mkl::uninitialized("sparse_blas", __func__, + "spmm_optimize must be called before spmm."); + } + CHECK_DESCR_MATCH(spmm_descr, opA, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, opB, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, A_view, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, A_handle, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, B_handle, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, C_handle, "spmm_optimize"); + CHECK_DESCR_MATCH(spmm_descr, alg, "spmm_optimize"); + + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); + DISPATCH_MKL_OPERATION("spmm", value_type, internal_spmm, queue, opA, opB, alpha, A_view, + A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies, + is_alpha_host_accessible, is_beta_host_accessible); +} + +} // namespace oneapi::mkl::sparse::BACKEND diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx new file mode 100644 index 000000000..d2332286b --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -0,0 +1,277 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +namespace oneapi::mkl::sparse { + +struct spmv_descr { + bool buffer_size_called = false; + bool optimized_called = false; + oneapi::mkl::transpose last_optimized_opA; + oneapi::mkl::sparse::matrix_view last_optimized_A_view; + oneapi::mkl::sparse::matrix_handle_t last_optimized_A_handle; + oneapi::mkl::sparse::dense_vector_handle_t last_optimized_x_handle; + oneapi::mkl::sparse::dense_vector_handle_t last_optimized_y_handle; + oneapi::mkl::sparse::spmv_alg last_optimized_alg; +}; + +} // namespace oneapi::mkl::sparse + +namespace oneapi::mkl::sparse::BACKEND { + +void init_spmv_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spmv_descr_t *p_spmv_descr) { + *p_spmv_descr = new spmv_descr(); +} + +sycl::event release_spmv_descr(sycl::queue &queue, oneapi::mkl::sparse::spmv_descr_t spmv_descr, + const std::vector &dependencies) { + return detail::submit_release(queue, spmv_descr, dependencies); +} + +void check_valid_spmv(const std::string &function_name, oneapi::mkl::transpose opA, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + bool is_alpha_host_accessible, bool is_beta_host_accessible) { + THROW_IF_NULLPTR(function_name, A_handle); + THROW_IF_NULLPTR(function_name, x_handle); + THROW_IF_NULLPTR(function_name, y_handle); + + auto internal_A_handle = detail::get_internal_handle(A_handle); + detail::check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle); + if (internal_A_handle->all_use_buffer()) { + detail::check_ptr_is_host_accessible("spmv", "alpha", is_alpha_host_accessible); + detail::check_ptr_is_host_accessible("spmv", "beta", is_beta_host_accessible); + } + if (is_alpha_host_accessible != is_beta_host_accessible) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "Alpha and beta must both be placed on host memory or device memory."); + } + if (A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal) { + throw mkl::invalid_argument("sparse_blas", function_name, + "Matrix view's type cannot be diagonal."); + } + + if (A_view.type_view != oneapi::mkl::sparse::matrix_descr::triangular && + A_view.diag_view == oneapi::mkl::diag::unit) { + throw mkl::invalid_argument( + "sparse_blas", function_name, + "`unit` diag_view can only be used with a triangular type_view."); + } + + if ((A_view.type_view == oneapi::mkl::sparse::matrix_descr::symmetric || + A_view.type_view == oneapi::mkl::sparse::matrix_descr::hermitian) && + opA == oneapi::mkl::transpose::conjtrans) { + throw mkl::unimplemented( + "sparse_blas", function_name, + "The backend does not support Symmetric or Hermitian matrix with `conjtrans`."); + } +} + +void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg /*alg*/, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + // TODO: Add support for external workspace once the close-source oneMKL backend supports it. + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + is_beta_host_accessible); + temp_buffer_size = 0; + spmv_descr->buffer_size_called = true; +} + +inline void common_spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmv("spmv_optimize", opA, A_view, A_handle, x_handle, y_handle, + is_alpha_host_accessible, is_beta_host_accessible); + if (!spmv_descr->buffer_size_called) { + throw mkl::uninitialized("sparse_blas", "spmv_optimize", + "spmv_buffer_size must be called before spmv_optimize."); + } + spmv_descr->optimized_called = true; + spmv_descr->last_optimized_opA = opA; + spmv_descr->last_optimized_A_view = A_view; + spmv_descr->last_optimized_A_handle = A_handle; + spmv_descr->last_optimized_x_handle = x_handle; + spmv_descr->last_optimized_y_handle = y_handle; + spmv_descr->last_optimized_alg = alg; +} + +void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, + sycl::buffer /*workspace*/) { + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (!internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__func__); + } + common_spmv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, + spmv_descr); + if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { + return; + } + internal_A_handle->can_be_reset = false; + if (A_view.type_view == matrix_descr::triangular) { + oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, + internal_A_handle->backend_handle); + } + else if (A_view.type_view == matrix_descr::symmetric || + A_view.type_view == matrix_descr::hermitian) { + // No optimize_symv currently + return; + } + else { + oneapi::mkl::sparse::optimize_gemv(queue, opA, internal_A_handle->backend_handle); + } +} + +sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, void * /*workspace*/, + const std::vector &dependencies) { + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__func__); + } + common_spmv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, + spmv_descr); + if (alg == oneapi::mkl::sparse::spmv_alg::no_optimize_alg) { + return detail::collapse_dependencies(queue, dependencies); + } + internal_A_handle->can_be_reset = false; + if (A_view.type_view == matrix_descr::triangular) { + return oneapi::mkl::sparse::optimize_trmv(queue, A_view.uplo_view, opA, A_view.diag_view, + internal_A_handle->backend_handle, dependencies); + } + else if (A_view.type_view == matrix_descr::symmetric || + A_view.type_view == matrix_descr::hermitian) { + return detail::collapse_dependencies(queue, dependencies); + } + else { + return oneapi::mkl::sparse::optimize_gemv(queue, opA, internal_A_handle->backend_handle, + dependencies); + } +} + +template +sycl::event internal_spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg /*alg*/, + oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, + const std::vector &dependencies, + bool is_alpha_host_accessible, bool is_beta_host_accessible) { + T host_alpha = + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + T host_beta = + detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + auto backend_handle = internal_A_handle->backend_handle; + if (internal_A_handle->all_use_buffer()) { + auto x_buffer = x_handle->get_buffer(); + auto y_buffer = y_handle->get_buffer(); + if (A_view.type_view == matrix_descr::triangular) { + oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha, + backend_handle, x_buffer, host_beta, y_buffer); + } + else if (A_view.type_view == matrix_descr::symmetric || + A_view.type_view == matrix_descr::hermitian) { + oneapi::mkl::sparse::symv(queue, A_view.uplo_view, host_alpha, backend_handle, x_buffer, + host_beta, y_buffer); + } + else { + oneapi::mkl::sparse::gemv(queue, opA, host_alpha, backend_handle, x_buffer, host_beta, + y_buffer); + } + // Dependencies are not used for buffers + return {}; + } + else { + auto x_usm = x_handle->get_usm_ptr(); + auto y_usm = y_handle->get_usm_ptr(); + if (A_view.type_view == matrix_descr::triangular) { + return oneapi::mkl::sparse::trmv(queue, A_view.uplo_view, opA, A_view.diag_view, + host_alpha, backend_handle, x_usm, host_beta, y_usm, + dependencies); + } + else if (A_view.type_view == matrix_descr::symmetric || + A_view.type_view == matrix_descr::hermitian) { + return oneapi::mkl::sparse::symv(queue, A_view.uplo_view, host_alpha, backend_handle, + x_usm, host_beta, y_usm, dependencies); + } + else { + return oneapi::mkl::sparse::gemv(queue, opA, host_alpha, backend_handle, x_usm, + host_beta, y_usm, dependencies); + } + } +} + +sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, + const std::vector &dependencies) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); + check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + is_beta_host_accessible); + + if (!spmv_descr->optimized_called) { + throw mkl::uninitialized("sparse_blas", __func__, + "spmv_optimize must be called before spmv."); + } + CHECK_DESCR_MATCH(spmv_descr, opA, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, A_view, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, A_handle, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, x_handle, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, y_handle, "spmv_optimize"); + CHECK_DESCR_MATCH(spmv_descr, alg, "spmv_optimize"); + + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); + DISPATCH_MKL_OPERATION("spmv", value_type, internal_spmv, queue, opA, alpha, A_view, A_handle, + x_handle, beta, y_handle, alg, spmv_descr, dependencies, + is_alpha_host_accessible, is_beta_host_accessible); +} + +} // namespace oneapi::mkl::sparse::BACKEND diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx new file mode 100644 index 000000000..7ef5b3c39 --- /dev/null +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -0,0 +1,223 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +namespace oneapi::mkl::sparse { + +struct spsv_descr { + bool buffer_size_called = false; + bool optimized_called = false; + oneapi::mkl::transpose last_optimized_opA; + oneapi::mkl::sparse::matrix_view last_optimized_A_view; + oneapi::mkl::sparse::matrix_handle_t last_optimized_A_handle; + oneapi::mkl::sparse::dense_vector_handle_t last_optimized_x_handle; + oneapi::mkl::sparse::dense_vector_handle_t last_optimized_y_handle; + oneapi::mkl::sparse::spsv_alg last_optimized_alg; +}; + +} // namespace oneapi::mkl::sparse + +namespace oneapi::mkl::sparse::BACKEND { + +void init_spsv_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spsv_descr_t *p_spsv_descr) { + *p_spsv_descr = new spsv_descr(); +} + +sycl::event release_spsv_descr(sycl::queue &queue, oneapi::mkl::sparse::spsv_descr_t spsv_descr, + const std::vector &dependencies) { + return detail::submit_release(queue, spsv_descr, dependencies); +} + +void check_valid_spsv(const std::string &function_name, oneapi::mkl::transpose opA, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + bool is_alpha_host_accessible, oneapi::mkl::sparse::spsv_alg alg) { + THROW_IF_NULLPTR(function_name, A_handle); + THROW_IF_NULLPTR(function_name, x_handle); + THROW_IF_NULLPTR(function_name, y_handle); + + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg && + !internal_A_handle->has_matrix_property(oneapi::mkl::sparse::matrix_property::sorted)) { + throw mkl::unimplemented( + "sparse_blas", function_name, + "The backend does not support `no_optimize_alg` unless A_handle has the property `matrix_property::sorted`."); + } + +#if BACKEND == gpu + detail::data_type data_type = internal_A_handle->get_value_type(); + if ((data_type == detail::data_type::complex_fp32 || + data_type == detail::data_type::complex_fp64) && + opA == oneapi::mkl::transpose::conjtrans) { + throw mkl::unimplemented("sparse_blas", function_name, + "The backend does not support spsv using conjtrans."); + } +#else + (void)opA; +#endif // BACKEND + + detail::check_all_containers_compatible(function_name, internal_A_handle, x_handle, y_handle); + if (A_view.type_view != matrix_descr::triangular) { + throw mkl::invalid_argument("sparse_blas", function_name, + "Matrix view's type must be `matrix_descr::triangular`."); + } + + if (internal_A_handle->all_use_buffer()) { + detail::check_ptr_is_host_accessible("spsv", "alpha", is_alpha_host_accessible); + } +} + +void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, std::size_t &temp_buffer_size) { + // TODO: Add support for external workspace once the close-source oneMKL backend supports it. + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + alg); + temp_buffer_size = 0; + spsv_descr->buffer_size_called = true; +} + +inline void common_spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + check_valid_spsv("spsv_optimize", opA, A_view, A_handle, x_handle, y_handle, + is_alpha_host_accessible, alg); + if (!spsv_descr->buffer_size_called) { + throw mkl::uninitialized("sparse_blas", "spsv_optimize", + "spsv_buffer_size must be called before spsv_optimize."); + } + spsv_descr->optimized_called = true; + spsv_descr->last_optimized_opA = opA; + spsv_descr->last_optimized_A_view = A_view; + spsv_descr->last_optimized_A_handle = A_handle; + spsv_descr->last_optimized_x_handle = x_handle; + spsv_descr->last_optimized_y_handle = y_handle; + spsv_descr->last_optimized_alg = alg; +} + +void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, + sycl::buffer /*workspace*/) { + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (!internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__func__); + } + common_spsv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr); + if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { + return; + } + internal_A_handle->can_be_reset = false; + oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, + internal_A_handle->backend_handle); +} + +sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, void * /*workspace*/, + const std::vector &dependencies) { + auto internal_A_handle = detail::get_internal_handle(A_handle); + if (internal_A_handle->all_use_buffer()) { + detail::throw_incompatible_container(__func__); + } + common_spsv_optimize(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr); + if (alg == oneapi::mkl::sparse::spsv_alg::no_optimize_alg) { + return detail::collapse_dependencies(queue, dependencies); + } + internal_A_handle->can_be_reset = false; + return oneapi::mkl::sparse::optimize_trsv(queue, A_view.uplo_view, opA, A_view.diag_view, + internal_A_handle->backend_handle, dependencies); +} + +template +sycl::event internal_spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg /*alg*/, + oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, + const std::vector &dependencies, + bool is_alpha_host_accessible) { + T host_alpha = + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + auto internal_A_handle = detail::get_internal_handle(A_handle); + internal_A_handle->can_be_reset = false; + if (internal_A_handle->all_use_buffer()) { + oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha, + internal_A_handle->backend_handle, x_handle->get_buffer(), + y_handle->get_buffer()); + // Dependencies are not used for buffers + return {}; + } + else { + return oneapi::mkl::sparse::trsv(queue, A_view.uplo_view, opA, A_view.diag_view, host_alpha, + internal_A_handle->backend_handle, + x_handle->get_usm_ptr(), y_handle->get_usm_ptr(), + dependencies); + } +} + +sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, + const std::vector &dependencies) { + bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); + check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, + alg); + + if (!spsv_descr->optimized_called) { + throw mkl::uninitialized("sparse_blas", __func__, + "spsv_optimize must be called before spsv."); + } + CHECK_DESCR_MATCH(spsv_descr, opA, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, A_view, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, A_handle, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, x_handle, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, y_handle, "spsv_optimize"); + CHECK_DESCR_MATCH(spsv_descr, alg, "spsv_optimize"); + + auto value_type = detail::get_internal_handle(A_handle)->get_value_type(); + DISPATCH_MKL_OPERATION("spsv", value_type, internal_spsv, queue, opA, alpha, A_view, A_handle, + x_handle, y_handle, alg, spsv_descr, dependencies, + is_alpha_host_accessible); +} + +} // namespace oneapi::mkl::sparse::BACKEND diff --git a/src/sparse_blas/backends/mklcpu/CMakeLists.txt b/src/sparse_blas/backends/mklcpu/CMakeLists.txt index cfcf9cf3d..e41cae268 100644 --- a/src/sparse_blas/backends/mklcpu/CMakeLists.txt +++ b/src/sparse_blas/backends/mklcpu/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ include(WarningsUtils) add_library(${LIB_NAME}) add_library(${LIB_OBJ} OBJECT - mklcpu_basic.cpp + mklcpu_handles.cpp mklcpu_operations.cpp $<$: mklcpu_wrappers.cpp> ) diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp deleted file mode 100644 index 9ab29ee92..000000000 --- a/src/sparse_blas/backends/mklcpu/mklcpu_basic.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include "../mkl_common/mkl_helper.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" - -namespace oneapi::mkl::sparse::mklcpu { - -#include "../mkl_common/mkl_basic.cxx" - -} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp new file mode 100644 index 000000000..a6ea51629 --- /dev/null +++ b/src/sparse_blas/backends/mklcpu/mklcpu_handles.cpp @@ -0,0 +1,29 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" + +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" + +namespace oneapi::mkl::sparse::mklcpu { + +#include "sparse_blas/backends/mkl_common/mkl_handles.cxx" + +} // namespace oneapi::mkl::sparse::mklcpu diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp index e636b1816..0929a7ef4 100644 --- a/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp +++ b/src/sparse_blas/backends/mklcpu/mklcpu_operations.cpp @@ -1,28 +1,33 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ -#include "../mkl_common/mkl_helper.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/macros.hpp" +#include "sparse_blas/matrix_view_comparison.hpp" #include "oneapi/mkl/sparse_blas/detail/mklcpu/onemkl_sparse_blas_mklcpu.hpp" -namespace oneapi::mkl::sparse::mklcpu { +#define BACKEND mklcpu -#include "../mkl_common/mkl_operations.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" -} // namespace oneapi::mkl::sparse::mklcpu +#undef BACKEND diff --git a/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp b/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp index 40f75c60c..1a6217684 100644 --- a/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp +++ b/src/sparse_blas/backends/mklcpu/mklcpu_wrappers.cpp @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ #include "oneapi/mkl/sparse_blas/types.hpp" diff --git a/src/sparse_blas/backends/mklgpu/CMakeLists.txt b/src/sparse_blas/backends/mklgpu/CMakeLists.txt index a31794547..cd25babc2 100644 --- a/src/sparse_blas/backends/mklgpu/CMakeLists.txt +++ b/src/sparse_blas/backends/mklgpu/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ include(WarningsUtils) add_library(${LIB_NAME}) add_library(${LIB_OBJ} OBJECT - mklgpu_basic.cpp + mklgpu_handles.cpp mklgpu_operations.cpp $<$: mklgpu_wrappers.cpp> ) diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_basic.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_basic.cpp deleted file mode 100644 index 8df24f8da..000000000 --- a/src/sparse_blas/backends/mklgpu/mklgpu_basic.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include "../mkl_common/mkl_helper.hpp" - -#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" - -namespace oneapi::mkl::sparse::mklgpu { - -#include "../mkl_common/mkl_basic.cxx" - -} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp new file mode 100644 index 000000000..7cb9853a7 --- /dev/null +++ b/src/sparse_blas/backends/mklgpu/mklgpu_handles.cpp @@ -0,0 +1,29 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" + +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" + +namespace oneapi::mkl::sparse::mklgpu { + +#include "sparse_blas/backends/mkl_common/mkl_handles.cxx" + +} // namespace oneapi::mkl::sparse::mklgpu diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp index 439dc4eea..be5e0c0aa 100644 --- a/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp +++ b/src/sparse_blas/backends/mklgpu/mklgpu_operations.cpp @@ -1,28 +1,33 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ -#include "../mkl_common/mkl_helper.hpp" +#include "sparse_blas/backends/mkl_common/mkl_handles.hpp" +#include "sparse_blas/backends/mkl_common/mkl_helper.hpp" +#include "sparse_blas/macros.hpp" +#include "sparse_blas/matrix_view_comparison.hpp" #include "oneapi/mkl/sparse_blas/detail/mklgpu/onemkl_sparse_blas_mklgpu.hpp" -namespace oneapi::mkl::sparse::mklgpu { +#define BACKEND mklgpu -#include "../mkl_common/mkl_operations.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spmm.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spmv.cxx" +#include "sparse_blas/backends/mkl_common/mkl_spsv.cxx" -} // namespace oneapi::mkl::sparse::mklgpu +#undef BACKEND diff --git a/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp b/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp index 346b13540..4a261f64e 100644 --- a/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp +++ b/src/sparse_blas/backends/mklgpu/mklgpu_wrappers.cpp @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ #include "oneapi/mkl/sparse_blas/types.hpp" diff --git a/src/sparse_blas/enum_data_types.hpp b/src/sparse_blas/enum_data_types.hpp new file mode 100644 index 000000000..26946facb --- /dev/null +++ b/src/sparse_blas/enum_data_types.hpp @@ -0,0 +1,69 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_ + +#include + +namespace oneapi::mkl::sparse::detail { + +enum data_type { none, int32, int64, real_fp32, real_fp64, complex_fp32, complex_fp64 }; + +inline std::string data_type_to_str(data_type data_type) { + switch (data_type) { + case none: return "none"; + case int32: return "int32"; + case int64: return "int64"; + case real_fp32: return "real_fp32"; + case real_fp64: return "real_fp64"; + case complex_fp32: return "complex_fp32"; + case complex_fp64: return "complex_fp64"; + default: return "unknown"; + } +} + +template +constexpr data_type get_data_type() { + if constexpr (std::is_same_v) { + return data_type::int32; + } + else if constexpr (std::is_same_v) { + return data_type::int64; + } + else if constexpr (std::is_same_v) { + return data_type::real_fp32; + } + else if constexpr (std::is_same_v) { + return data_type::real_fp64; + } + else if constexpr (std::is_same_v>) { + return data_type::complex_fp32; + } + else if constexpr (std::is_same_v>) { + return data_type::complex_fp64; + } + else { + static_assert(false, "Unsupported type"); + } +} + +} // namespace oneapi::mkl::sparse::detail + +#endif // _ONEMKL_SRC_SPARSE_BLAS_ENUM_DATA_TYPES_HPP_ diff --git a/src/sparse_blas/function_table.hpp b/src/sparse_blas/function_table.hpp index 57279fb3f..d1e3d8189 100644 --- a/src/sparse_blas/function_table.hpp +++ b/src/sparse_blas/function_table.hpp @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* (*Licensed under the Apache License, Version 2.0 )(the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ #ifndef _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ #define _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ @@ -23,87 +23,245 @@ #include "oneapi/mkl/sparse_blas/types.hpp" #include "sparse_blas/macros.hpp" -#define DEFINE_SET_CSR_DATA(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ - void (*set_csr_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t handle, INT_TYPE num_rows, \ - INT_TYPE num_cols, INT_TYPE nnz, oneapi::mkl::index_base index, \ - sycl::buffer & row_ptr, sycl::buffer & col_ind, \ - sycl::buffer & val); \ - sycl::event (*set_csr_data_usm##FP_SUFFIX##INT_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t handle, INT_TYPE num_rows, \ - INT_TYPE num_cols, INT_TYPE nnz, oneapi::mkl::index_base index, INT_TYPE * row_ptr, \ - INT_TYPE * col_ind, FP_TYPE * val, const std::vector &dependencies) - -#define DEFINE_GEMV(FP_TYPE, FP_SUFFIX) \ - void (*gemv_buffer##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::transpose transpose_val, const FP_TYPE alpha, \ - oneapi::mkl::sparse::matrix_handle_t A_handle, sycl::buffer &x, \ - const FP_TYPE beta, sycl::buffer &y); \ - sycl::event (*gemv_usm##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::transpose transpose_val, const FP_TYPE alpha, \ - oneapi::mkl::sparse::matrix_handle_t A_handle, const FP_TYPE *x, const FP_TYPE beta, \ - FP_TYPE *y, const std::vector &dependencies) - -#define DEFINE_TRSV(FP_TYPE, FP_SUFFIX) \ - void (*trsv_buffer##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, \ - oneapi::mkl::diag diag_val, oneapi::mkl::sparse::matrix_handle_t A_handle, \ - sycl::buffer & x, sycl::buffer & y); \ - sycl::event (*trsv_usm##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, \ - oneapi::mkl::diag diag_val, oneapi::mkl::sparse::matrix_handle_t A_handle, \ - const FP_TYPE *x, FP_TYPE *y, const std::vector &dependencies) - -#define DEFINE_GEMM(FP_TYPE, FP_SUFFIX) \ - void (*gemm_buffer##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::layout dense_matrix_layout, \ - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, \ - const FP_TYPE alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, \ - sycl::buffer &B, const std::int64_t columns, const std::int64_t ldb, \ - const FP_TYPE beta, sycl::buffer &C, const std::int64_t ldc); \ - sycl::event (*gemm_usm##FP_SUFFIX)( \ - sycl::queue & queue, oneapi::mkl::layout dense_matrix_layout, \ - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, \ - const FP_TYPE alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, const FP_TYPE *B, \ - const std::int64_t columns, const std::int64_t ldb, const FP_TYPE beta, FP_TYPE *C, \ - const std::int64_t ldc, const std::vector &dependencies) +// Dense vector +#define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + void (*init_dense_vector_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, sycl::buffer val); \ + void (*init_dense_vector_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ + std::int64_t size, FP_TYPE * val); \ + void (*set_dense_vector_data_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, sycl::buffer val); \ + void (*set_dense_vector_data_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ + std::int64_t size, FP_TYPE * val) + +// Dense matrix +#define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + void (*init_dense_matrix_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + void (*init_dense_matrix_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE * val); \ + void (*set_dense_matrix_data_buffer##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, sycl::buffer val); \ + void (*set_dense_matrix_data_usm##FP_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + oneapi::mkl::layout dense_layout, FP_TYPE * val) + +// COO matrix +#define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + void (*init_coo_matrix_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, sycl::buffer row_ind, \ + sycl::buffer col_ind, sycl::buffer val); \ + void (*init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val); \ + void (*set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + sycl::buffer row_ind, sycl::buffer col_ind, \ + sycl::buffer val); \ + void (*set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val) + +// CSR matrix +#define DEFINE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + void (*init_csr_matrix_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, sycl::buffer row_ptr, \ + sycl::buffer col_ind, sycl::buffer val); \ + void (*init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ + oneapi::mkl::index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val); \ + void (*set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + sycl::buffer row_ptr, sycl::buffer col_ind, \ + sycl::buffer val); \ + void (*set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ + sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ + INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val) typedef struct { int version; - void (*init_matrix_handle)(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_handle); - sycl::event (*release_matrix_handle)(sycl::queue &queue, - oneapi::mkl::sparse::matrix_handle_t *p_handle, + // Dense vector + FOR_EACH_FP_TYPE(DEFINE_DENSE_VECTOR_FUNCS); + sycl::event (*release_dense_vector)(sycl::queue &queue, + oneapi::mkl::sparse::dense_vector_handle_t dvhandle, + const std::vector &dependencies); + + // Dense matrix + FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS); + sycl::event (*release_dense_matrix)(sycl::queue &queue, + oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, + const std::vector &dependencies); + + // COO matrix + FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); + + // CSR matrix + FOR_EACH_FP_AND_INT_TYPE(DEFINE_CSR_MATRIX_FUNCS); + + // Common sparse matrix functions + sycl::event (*release_sparse_matrix)(sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t smhandle, const std::vector &dependencies); - FOR_EACH_FP_AND_INT_TYPE(DEFINE_SET_CSR_DATA); - - // optimize_* - sycl::event (*optimize_gemm_v1)(sycl::queue &queue, oneapi::mkl::transpose transpose_A, - oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies); - sycl::event (*optimize_gemm_v2)(sycl::queue &queue, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, - oneapi::mkl::layout dense_matrix_layout, - const std::int64_t columns, - oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies); - sycl::event (*optimize_gemv)(sycl::queue &queue, oneapi::mkl::transpose transpose_val, - oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies); - sycl::event (*optimize_trsv)(sycl::queue &queue, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies); - - FOR_EACH_FP_TYPE(DEFINE_GEMV); - FOR_EACH_FP_TYPE(DEFINE_TRSV); - FOR_EACH_FP_TYPE(DEFINE_GEMM); + bool (*set_matrix_property)(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + oneapi::mkl::sparse::matrix_property property); + + // SPMM + void (*init_spmm_descr)(sycl::queue &queue, oneapi::mkl::sparse::spmm_descr_t *p_spmm_descr); + + sycl::event (*release_spmm_descr)(sycl::queue &queue, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, + const std::vector &dependencies); + + void (*spmm_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, + std::size_t &temp_buffer_size); + + void (*spmm_optimize_buffer)( + sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, sycl::buffer workspace); + + sycl::event (*spmm_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, + const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, void *workspace, + const std::vector &dependencies); + + sycl::event (*spmm)(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t C_handle, + oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::spmm_descr_t spmm_descr, + const std::vector &dependencies); + + // SPMV + void (*init_spmv_descr)(sycl::queue &queue, oneapi::mkl::sparse::spmv_descr_t *p_spmv_descr); + + sycl::event (*release_spmv_descr)(sycl::queue &queue, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, + const std::vector &dependencies); + + void (*spmv_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, + std::size_t &temp_buffer_size); + + void (*spmv_optimize_buffer)( + sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, sycl::buffer workspace); + + sycl::event (*spmv_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, void *workspace, + const std::vector &dependencies); + + sycl::event (*spmv)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spmv_alg alg, + oneapi::mkl::sparse::spmv_descr_t spmv_descr, + const std::vector &dependencies); + + // SPSV + void (*init_spsv_descr)(sycl::queue &queue, oneapi::mkl::sparse::spsv_descr_t *p_spsv_descr); + + sycl::event (*release_spsv_descr)(sycl::queue &queue, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, + const std::vector &dependencies); + + void (*spsv_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size); + + void (*spsv_optimize_buffer)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, + sycl::buffer workspace); + + sycl::event (*spsv_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, + const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, void *workspace, + const std::vector &dependencies); + + sycl::event (*spsv)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + oneapi::mkl::sparse::matrix_view A_view, + oneapi::mkl::sparse::matrix_handle_t A_handle, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, + oneapi::mkl::sparse::dense_vector_handle_t y_handle, + oneapi::mkl::sparse::spsv_alg alg, + oneapi::mkl::sparse::spsv_descr_t spsv_descr, + const std::vector &dependencies); } sparse_blas_function_table_t; -#undef DEFINE_SET_CSR_DATA -#undef DEFINE_GEMV -#undef DEFINE_TRSV -#undef DEFINE_GEMM +#undef DEFINE_DENSE_VECTOR_FUNCS +#undef DEFINE_DENSE_MATRIX_FUNCS +#undef DEFINE_COO_MATRIX_FUNCS +#undef DEFINE_CSR_MATRIX_FUNCS #endif // _ONEMKL_SPARSE_BLAS_FUNCTION_TABLE_HPP_ diff --git a/src/sparse_blas/generic_container.hpp b/src/sparse_blas/generic_container.hpp new file mode 100644 index 000000000..53bd50837 --- /dev/null +++ b/src/sparse_blas/generic_container.hpp @@ -0,0 +1,334 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_ + +#include +#include + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl/sparse_blas/types.hpp" +#include "enum_data_types.hpp" + +namespace oneapi::mkl::sparse::detail { + +/// Represent a non-templated container for USM or buffer. +struct generic_container { + // USM pointer, nullptr if the provided data is a buffer. + void* usm_ptr; + + // Buffer pointer, nullptr if the provided data is a USM pointer. + // The buffer is needed to properly handle the dependencies when the handle is used. + // Use a void* type for the buffer to avoid using template arguments in every function using data handles. + // Using reinterpret does not solve the issue as the returned buffer needs the type of the original buffer for the aligned_allocator. + std::shared_ptr buffer_ptr; + + // Underlying USM or buffer data type + data_type data_type; + + generic_container() : usm_ptr(nullptr), buffer_ptr(), data_type(data_type::none) {} + + template + generic_container(T* ptr) : usm_ptr(ptr), + buffer_ptr(), + data_type(get_data_type()) {} + + template + generic_container(const sycl::buffer buffer) + : usm_ptr(nullptr), + buffer_ptr(std::make_shared>(buffer)), + data_type(get_data_type()) {} + + template + void set_usm_ptr(T* ptr) { + usm_ptr = ptr; + data_type = get_data_type(); + } + + template + void set_buffer_untyped(const sycl::buffer buffer) { + buffer_ptr = std::make_shared>(buffer); + // Do not set data_type if T is meant as a generic byte type. + } + + template + void set_buffer(const sycl::buffer buffer) { + set_buffer_untyped(buffer); + data_type = get_data_type(); + } + + template + T* get_usm_ptr() { + return static_cast(usm_ptr); + } + + template + auto& get_buffer() { + return *reinterpret_cast*>(buffer_ptr.get()); + } +}; + +/// Common type for dense vector and matrix handles +template +struct generic_dense_handle { + BackendHandleT backend_handle; + + generic_container value_container; + + template + generic_dense_handle(BackendHandleT backend_handle, T* value_ptr) + : backend_handle(backend_handle), + value_container(generic_container(value_ptr)) {} + + template + generic_dense_handle(BackendHandleT backend_handle, const sycl::buffer value_buffer) + : backend_handle(backend_handle), + value_container(value_buffer) {} + + bool all_use_buffer() const { + return static_cast(value_container.buffer_ptr); + } + + data_type get_value_type() const { + return value_container.data_type; + } + + data_type get_int_type() const { + return data_type::none; + } + + template + T* get_usm_ptr() { + return value_container.get_usm_ptr(); + } + + template + auto& get_buffer() { + return value_container.get_buffer(); + } + + template + void set_usm_ptr(T* ptr) { + value_container.set_usm_ptr(ptr); + } + + template + void set_buffer(const sycl::buffer buffer) { + value_container.set_buffer(buffer); + } +}; + +/// Generic dense_vector_handle used by all backends +template +struct generic_dense_vector_handle : public detail::generic_dense_handle { + std::int64_t size; + + template + generic_dense_vector_handle(BackendHandleT backend_handle, T* value_ptr, std::int64_t size) + : generic_dense_handle(backend_handle, value_ptr), + size(size) {} + + template + generic_dense_vector_handle(BackendHandleT backend_handle, + const sycl::buffer value_buffer, std::int64_t size) + : generic_dense_handle(backend_handle, value_buffer), + size(size) { + if (value_buffer.size() < static_cast(size)) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", "init_dense_vector", + "Buffer size too small, expected at least " + std::to_string(size) + " but got " + + std::to_string(value_buffer.size()) + " elements."); + } + } +}; + +/// Generic dense_matrix_handle used by all backends +template +struct generic_dense_matrix_handle : public detail::generic_dense_handle { + std::int64_t num_rows; + std::int64_t num_cols; + std::int64_t ld; + oneapi::mkl::layout dense_layout; + + template + generic_dense_matrix_handle(BackendHandleT backend_handle, T* value_ptr, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout) + : generic_dense_handle(backend_handle, value_ptr), + num_rows(num_rows), + num_cols(num_cols), + ld(ld), + dense_layout(dense_layout) {} + + template + generic_dense_matrix_handle(BackendHandleT backend_handle, + const sycl::buffer value_buffer, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout) + : generic_dense_handle(backend_handle, value_buffer), + num_rows(num_rows), + num_cols(num_cols), + ld(ld), + dense_layout(dense_layout) { + std::size_t minimum_size = static_cast( + (dense_layout == oneapi::mkl::layout::row_major ? num_rows : num_cols) * ld); + if (value_buffer.size() < minimum_size) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", "init_dense_matrix", + "Buffer size too small, expected at least " + std::to_string(minimum_size) + + " but got " + std::to_string(value_buffer.size()) + " elements."); + } + } +}; + +/// Generic sparse_matrix_handle used by all backends +template +struct generic_sparse_handle { + BackendHandleT backend_handle; + + generic_container row_container; + generic_container col_container; + generic_container value_container; + + std::int32_t properties_mask; + bool can_be_reset; + + template + generic_sparse_handle(BackendHandleT backend_handle, intType* row_ptr, intType* col_ptr, + fpType* value_ptr) + : backend_handle(backend_handle), + row_container(generic_container(row_ptr)), + col_container(generic_container(col_ptr)), + value_container(generic_container(value_ptr)), + properties_mask(0), + can_be_reset(true) {} + + template + generic_sparse_handle(BackendHandleT backend_handle, const sycl::buffer row_buffer, + const sycl::buffer col_buffer, + const sycl::buffer value_buffer) + : backend_handle(backend_handle), + row_container(row_buffer), + col_container(col_buffer), + value_container(value_buffer), + properties_mask(0), + can_be_reset(true) {} + + bool all_use_buffer() const { + return static_cast(value_container.buffer_ptr) && + static_cast(row_container.buffer_ptr) && + static_cast(col_container.buffer_ptr); + } + + data_type get_value_type() const { + return value_container.data_type; + } + + data_type get_int_type() const { + return row_container.data_type; + } + + void set_matrix_property(oneapi::mkl::sparse::matrix_property property) { + properties_mask |= matrix_property_to_mask(property); + } + + bool has_matrix_property(oneapi::mkl::sparse::matrix_property property) { + return properties_mask & matrix_property_to_mask(property); + } + +private: + std::int32_t matrix_property_to_mask(oneapi::mkl::sparse::matrix_property property) { + switch (property) { + case oneapi::mkl::sparse::matrix_property::symmetric: return 1 << 0; + case oneapi::mkl::sparse::matrix_property::sorted: return 1 << 1; + default: + throw oneapi::mkl::invalid_argument( + "sparse_blas", "set_matrix_property", + "Unsupported matrix property " + std::to_string(static_cast(property))); + } + } +}; + +inline void throw_incompatible_container(const std::string& function_name) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible container types. All inputs and outputs must use the same container: buffer or USM"); +} + +/** + * Check that all internal containers use the same container. +*/ +template +void check_all_containers_use_buffers(const std::string& function_name, + ContainerT first_internal_container, + Ts... internal_containers) { + bool first_use_buffer = first_internal_container->all_use_buffer(); + for (const auto internal_container : { internal_containers... }) { + if (internal_container->all_use_buffer() != first_use_buffer) { + throw_incompatible_container(function_name); + } + } +} + +/** + * Check that all internal containers use the same container type, data type and integer type. + * The integer type can be 'none' if the internal container does not store any integer (i.e. for dense handles). + * The first internal container is used to determine what container and types the other internal containers should use. +*/ +template +void check_all_containers_compatible(const std::string& function_name, + ContainerT first_internal_container, + Ts... internal_containers) { + check_all_containers_use_buffers(function_name, first_internal_container, + internal_containers...); + data_type first_value_type = first_internal_container->get_value_type(); + data_type first_int_type = first_internal_container->get_int_type(); + for (const auto internal_container : { internal_containers... }) { + const data_type other_value_type = internal_container->get_value_type(); + if (other_value_type != first_value_type) { + throw oneapi::mkl::invalid_argument( + "sparse_blas", function_name, + "Incompatible data types expected " + data_type_to_str(first_value_type) + + " but got " + data_type_to_str(other_value_type)); + } + const data_type other_int_type = internal_container->get_int_type(); + if (other_int_type != data_type::none && other_int_type != first_int_type) { + throw oneapi::mkl::invalid_argument("sparse_blas", function_name, + "Incompatible integer types expected " + + data_type_to_str(first_int_type) + " but got " + + data_type_to_str(other_int_type)); + } + } +} + +template +sycl::event submit_release(sycl::queue& queue, T* ptr, const DependenciesT& dependencies) { + return queue.submit([&](sycl::handler& cgh) { + cgh.depends_on(dependencies); + cgh.host_task([=]() { delete ptr; }); + }); +} + +} // namespace oneapi::mkl::sparse::detail + +#endif // _ONEMKL_SRC_SPARSE_BLAS_GENERIC_CONTAINER_HPP_ diff --git a/src/sparse_blas/macros.hpp b/src/sparse_blas/macros.hpp index a4ef88e35..7eba01390 100644 --- a/src/sparse_blas/macros.hpp +++ b/src/sparse_blas/macros.hpp @@ -36,4 +36,10 @@ FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int32_t, _i32); \ FOR_EACH_FP_AND_INT_TYPE_HELPER(DEFINE_MACRO, std::int64_t, _i64) +#define THROW_IF_NULLPTR(FUNC_NAME, PTR) \ + if (!(PTR)) { \ + throw mkl::uninitialized("sparse_blas", FUNC_NAME, \ + std::string(#PTR) + " must not be nullptr."); \ + } + #endif // _ONEMKL_SPARSE_BLAS_MACROS_HPP_ diff --git a/src/sparse_blas/matrix_view_comparison.hpp b/src/sparse_blas/matrix_view_comparison.hpp new file mode 100644 index 000000000..e01be7311 --- /dev/null +++ b/src/sparse_blas/matrix_view_comparison.hpp @@ -0,0 +1,36 @@ +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ + +#ifndef _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_ +#define _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_ + +#include "oneapi/mkl/sparse_blas/matrix_view.hpp" + +inline bool operator==(const oneapi::mkl::sparse::matrix_view& lhs, + const oneapi::mkl::sparse::matrix_view& rhs) { + return lhs.type_view == rhs.type_view && lhs.uplo_view == rhs.uplo_view && + lhs.diag_view == rhs.diag_view; +} + +inline bool operator!=(const oneapi::mkl::sparse::matrix_view& lhs, + const oneapi::mkl::sparse::matrix_view& rhs) { + return !(lhs == rhs); +} + +#endif // _ONEMKL_SRC_SPARSE_BLAS_MATRIX_VIEW_COMPARISON_HPP_ \ No newline at end of file diff --git a/src/sparse_blas/sparse_blas_loader.cpp b/src/sparse_blas/sparse_blas_loader.cpp index 95da6df9c..cdc3ae6b2 100644 --- a/src/sparse_blas/sparse_blas_loader.cpp +++ b/src/sparse_blas/sparse_blas_loader.cpp @@ -1,21 +1,21 @@ -/******************************************************************************* -* Copyright 2023 Codeplay Software Ltd. +/*************************************************************************** +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at * -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 * -* http://www.apache.org/licenses/LICENSE-2.0 +* For your convenience, a copy of the License has been included in this +* repository. * -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. * -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ +**************************************************************************/ #include "oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp" @@ -30,133 +30,326 @@ static oneapi::mkl::detail::table_initializer function_tables; -void init_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle) { +// Dense vector +#define DEFINE_DENSE_VECTOR_FUNCS(FP_TYPE, FP_SUFFIX) \ + template <> \ + void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, \ + std::int64_t size, sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_dense_vector_buffer##FP_SUFFIX(queue, p_dvhandle, size, val); \ + } \ + template <> \ + void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, \ + std::int64_t size, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_dense_vector_usm##FP_SUFFIX(queue, p_dvhandle, size, val); \ + } \ + template <> \ + void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, \ + std::int64_t size, sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_dense_vector_data_buffer##FP_SUFFIX(queue, dvhandle, size, \ + val); \ + } \ + template <> \ + void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, \ + std::int64_t size, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_dense_vector_data_usm##FP_SUFFIX(queue, dvhandle, size, val); \ + } +FOR_EACH_FP_TYPE(DEFINE_DENSE_VECTOR_FUNCS); +#undef DEFINE_DENSE_VECTOR_FUNCS + +sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, + const std::vector &dependencies) { auto libkey = get_device_id(queue); - function_tables[libkey].init_matrix_handle(queue, p_handle); + return function_tables[libkey].release_dense_vector(queue, dvhandle, dependencies); } -sycl::event release_matrix_handle(sycl::queue &queue, matrix_handle_t *p_handle, - const std::vector &dependencies) { +// Dense matrix +#define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ + template <> \ + void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + layout dense_layout, sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_dense_matrix_buffer##FP_SUFFIX( \ + queue, p_dmhandle, num_rows, num_cols, ld, dense_layout, val); \ + } \ + template <> \ + void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + layout dense_layout, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_dense_matrix_usm##FP_SUFFIX(queue, p_dmhandle, num_rows, \ + num_cols, ld, dense_layout, val); \ + } \ + template <> \ + void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + layout dense_layout, sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_dense_matrix_data_buffer##FP_SUFFIX( \ + queue, dmhandle, num_rows, num_cols, ld, dense_layout, val); \ + } \ + template <> \ + void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, \ + std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ + layout dense_layout, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_dense_matrix_data_usm##FP_SUFFIX( \ + queue, dmhandle, num_rows, num_cols, ld, dense_layout, val); \ + } +FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS); +#undef DEFINE_DENSE_MATRIX_FUNCS + +sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, + const std::vector &dependencies) { auto libkey = get_device_id(queue); - return function_tables[libkey].release_matrix_handle(queue, p_handle, dependencies); + return function_tables[libkey].release_dense_matrix(queue, dmhandle, dependencies); } -#define DEFINE_SET_CSR_DATA(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ +// COO matrix +#define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template <> \ + void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + sycl::buffer row_ind, sycl::buffer col_ind, \ + sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_coo_matrix_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ + } \ + template <> \ + void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ + } \ template <> \ - void set_csr_data(sycl::queue &queue, matrix_handle_t handle, INT_TYPE num_rows, \ - INT_TYPE num_cols, INT_TYPE nnz, index_base index, \ - sycl::buffer &row_ptr, sycl::buffer &col_ind, \ - sycl::buffer &val) { \ + void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + sycl::buffer row_ind, sycl::buffer col_ind, \ + sycl::buffer val) { \ auto libkey = get_device_id(queue); \ - function_tables[libkey].set_csr_data_buffer##FP_SUFFIX##INT_SUFFIX( \ - queue, handle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + function_tables[libkey].set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } \ template <> \ - sycl::event set_csr_data(sycl::queue &queue, matrix_handle_t handle, INT_TYPE num_rows, \ - INT_TYPE num_cols, INT_TYPE nnz, index_base index, INT_TYPE *row_ptr, \ - INT_TYPE *col_ind, FP_TYPE *val, \ - const std::vector &dependencies) { \ + void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val) { \ auto libkey = get_device_id(queue); \ - return function_tables[libkey].set_csr_data_usm##FP_SUFFIX##INT_SUFFIX( \ - queue, handle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val, dependencies); \ + function_tables[libkey].set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } +FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); +#undef DEFINE_COO_MATRIX_FUNCS -FOR_EACH_FP_AND_INT_TYPE(DEFINE_SET_CSR_DATA) -#undef DEFINE_SET_CSR_DATA +// CSR matrix +#define DEFINE_INIT_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ + template <> \ + void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + sycl::buffer row_ptr, sycl::buffer col_ind, \ + sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_csr_matrix_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } \ + template <> \ + void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } \ + template <> \ + void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + sycl::buffer row_ptr, sycl::buffer col_ind, \ + sycl::buffer val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX( \ + queue, smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } \ + template <> \ + void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + std::int64_t num_cols, std::int64_t nnz, index_base index, \ + INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val) { \ + auto libkey = get_device_id(queue); \ + function_tables[libkey].set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX( \ + queue, smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ + } +FOR_EACH_FP_AND_INT_TYPE(DEFINE_INIT_CSR_MATRIX_FUNCS); +#undef DEFINE_INIT_CSR_MATRIX_FUNCS -sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, matrix_handle_t handle, - const std::vector &dependencies) { +// Common sparse matrix functions +sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, + const std::vector &dependencies) { auto libkey = get_device_id(queue); - return function_tables[libkey].optimize_gemm_v1(queue, transpose_A, handle, dependencies); + return function_tables[libkey].release_sparse_matrix(queue, smhandle, dependencies); } -sycl::event optimize_gemm(sycl::queue &queue, transpose transpose_A, transpose transpose_B, - layout dense_matrix_layout, const std::int64_t columns, - matrix_handle_t handle, const std::vector &dependencies) { +bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property) { auto libkey = get_device_id(queue); - return function_tables[libkey].optimize_gemm_v2( - queue, transpose_A, transpose_B, dense_matrix_layout, columns, handle, dependencies); + return function_tables[libkey].set_matrix_property(queue, smhandle, property); } -sycl::event optimize_gemv(sycl::queue &queue, transpose transpose_val, matrix_handle_t handle, +// SPMM +void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr) { + auto libkey = get_device_id(queue); + function_tables[libkey].init_spmm_descr(queue, p_spmm_descr); +} + +sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].release_spmm_descr(queue, spmm_descr, dependencies); +} + +void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, + std::size_t &temp_buffer_size) { + auto libkey = get_device_id(queue); + function_tables[libkey].spmm_buffer_size(queue, opA, opB, alpha, A_view, A_handle, B_handle, + beta, C_handle, alg, spmm_descr, temp_buffer_size); +} + +void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, + spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace) { + auto libkey = get_device_id(queue); + function_tables[libkey].spmm_optimize_buffer(queue, opA, opB, alpha, A_view, A_handle, B_handle, + beta, C_handle, alg, spmm_descr, workspace); +} + +sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, + const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void *workspace, const std::vector &dependencies) { auto libkey = get_device_id(queue); - return function_tables[libkey].optimize_gemv(queue, transpose_val, handle, dependencies); + return function_tables[libkey].spmm_optimize_usm(queue, opA, opB, alpha, A_view, A_handle, + B_handle, beta, C_handle, alg, spmm_descr, + workspace, dependencies); } -sycl::event optimize_trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, - matrix_handle_t handle, const std::vector &dependencies) { +sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, + spmm_alg alg, spmm_descr_t spmm_descr, + const std::vector &dependencies) { auto libkey = get_device_id(queue); - return function_tables[libkey].optimize_trsv(queue, uplo_val, transpose_val, diag_val, handle, - dependencies); + return function_tables[libkey].spmm(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, + C_handle, alg, spmm_descr, dependencies); } -#define DEFINE_GEMV(FP_TYPE, FP_SUFFIX) \ - template <> \ - void gemv(sycl::queue &queue, transpose transpose_val, const FP_TYPE alpha, \ - matrix_handle_t A_handle, sycl::buffer &x, const FP_TYPE beta, \ - sycl::buffer &y) { \ - auto libkey = get_device_id(queue); \ - function_tables[libkey].gemv_buffer##FP_SUFFIX(queue, transpose_val, alpha, A_handle, x, \ - beta, y); \ - } \ - template <> \ - sycl::event gemv(sycl::queue &queue, transpose transpose_val, const FP_TYPE alpha, \ - matrix_handle_t A_handle, const FP_TYPE *x, const FP_TYPE beta, FP_TYPE *y, \ - const std::vector &dependencies) { \ - auto libkey = get_device_id(queue); \ - return function_tables[libkey].gemv_usm##FP_SUFFIX(queue, transpose_val, alpha, A_handle, \ - x, beta, y, dependencies); \ - } +// SPMV +void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr) { + auto libkey = get_device_id(queue); + function_tables[libkey].init_spmv_descr(queue, p_spmv_descr); +} -FOR_EACH_FP_TYPE(DEFINE_GEMV) -#undef DEFINE_GEMV - -#define DEFINE_TRSV(FP_TYPE, FP_SUFFIX) \ - template <> \ - void trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ - matrix_handle_t A_handle, sycl::buffer &x, \ - sycl::buffer &y) { \ - auto libkey = get_device_id(queue); \ - function_tables[libkey].trsv_buffer##FP_SUFFIX(queue, uplo_val, transpose_val, diag_val, \ - A_handle, x, y); \ - } \ - template <> \ - sycl::event trsv(sycl::queue &queue, uplo uplo_val, transpose transpose_val, diag diag_val, \ - matrix_handle_t A_handle, const FP_TYPE *x, FP_TYPE *y, \ - const std::vector &dependencies) { \ - auto libkey = get_device_id(queue); \ - return function_tables[libkey].trsv_usm##FP_SUFFIX( \ - queue, uplo_val, transpose_val, diag_val, A_handle, x, y, dependencies); \ - } +sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].release_spmv_descr(queue, spmv_descr, dependencies); +} -FOR_EACH_FP_TYPE(DEFINE_TRSV) -#undef DEFINE_TRSV - -#define DEFINE_GEMM(FP_TYPE, FP_SUFFIX) \ - template <> \ - void gemm(sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, \ - transpose transpose_B, const FP_TYPE alpha, matrix_handle_t A_handle, \ - sycl::buffer &B, const std::int64_t columns, const std::int64_t ldb, \ - const FP_TYPE beta, sycl::buffer &C, const std::int64_t ldc) { \ - auto libkey = get_device_id(queue); \ - function_tables[libkey].gemm_buffer##FP_SUFFIX(queue, dense_matrix_layout, transpose_A, \ - transpose_B, alpha, A_handle, B, columns, \ - ldb, beta, C, ldc); \ - } \ - template <> \ - sycl::event gemm(sycl::queue &queue, layout dense_matrix_layout, transpose transpose_A, \ - transpose transpose_B, const FP_TYPE alpha, matrix_handle_t A_handle, \ - const FP_TYPE *B, const std::int64_t columns, const std::int64_t ldb, \ - const FP_TYPE beta, FP_TYPE *C, const std::int64_t ldc, \ - const std::vector &dependencies) { \ - auto libkey = get_device_id(queue); \ - return function_tables[libkey].gemm_usm##FP_SUFFIX( \ - queue, dense_matrix_layout, transpose_A, transpose_B, alpha, A_handle, B, columns, \ - ldb, beta, C, ldc, dependencies); \ - } +void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + auto libkey = get_device_id(queue); + function_tables[libkey].spmv_buffer_size(queue, opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, temp_buffer_size); +} + +void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, sycl::buffer workspace) { + auto libkey = get_device_id(queue); + function_tables[libkey].spmv_optimize_buffer(queue, opA, alpha, A_view, A_handle, x_handle, + beta, y_handle, alg, spmv_descr, workspace); +} -FOR_EACH_FP_TYPE(DEFINE_GEMM) -#undef DEFINE_GEMM +sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, + void *workspace, const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].spmv_optimize_usm(queue, opA, alpha, A_view, A_handle, x_handle, + beta, y_handle, alg, spmv_descr, workspace, + dependencies); +} + +sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].spmv(queue, opA, alpha, A_view, A_handle, x_handle, beta, + y_handle, alg, spmv_descr, dependencies); +} + +// SPSV +void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr) { + auto libkey = get_device_id(queue); + function_tables[libkey].init_spsv_descr(queue, p_spsv_descr); +} + +sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].release_spsv_descr(queue, spsv_descr, dependencies); +} + +void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + std::size_t &temp_buffer_size) { + auto libkey = get_device_id(queue); + function_tables[libkey].spsv_buffer_size(queue, opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, temp_buffer_size); +} + +void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + sycl::buffer workspace) { + auto libkey = get_device_id(queue); + function_tables[libkey].spsv_optimize_buffer(queue, opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, workspace); +} + +sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, + spsv_alg alg, spsv_descr_t spsv_descr, void *workspace, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].spsv_optimize_usm(queue, opA, alpha, A_view, A_handle, x_handle, + y_handle, alg, spsv_descr, workspace, + dependencies); +} + +sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, + dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, + const std::vector &dependencies) { + auto libkey = get_device_id(queue); + return function_tables[libkey].spsv(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, + alg, spsv_descr, dependencies); +} } // namespace oneapi::mkl::sparse diff --git a/tests/unit_tests/include/test_helper.hpp b/tests/unit_tests/include/test_helper.hpp index 7e0024195..ad215761f 100644 --- a/tests/unit_tests/include/test_helper.hpp +++ b/tests/unit_tests/include/test_helper.hpp @@ -203,7 +203,7 @@ TEST_RUN_PORTFFT_SELECT_NO_ARGS(q, func); \ } while (0); -#define TEST_RUN_CT_SELECT(q, func, ...) \ +#define TEST_RUN_BLAS_CT_SELECT(q, func, ...) \ do { \ if (CHECK_HOST_OR_CPU(q)) \ TEST_RUN_INTELCPU_SELECT(q, func, __VA_ARGS__); \ @@ -214,21 +214,15 @@ TEST_RUN_INTELGPU_SELECT(q, func, __VA_ARGS__); \ else if (vendor_id == NVIDIA_ID) { \ TEST_RUN_NVIDIAGPU_CUBLAS_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_NVIDIAGPU_CUSOLVER_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_NVIDIAGPU_CURAND_SELECT(q, func, __VA_ARGS__); \ } \ else if (vendor_id == AMD_ID) { \ TEST_RUN_AMDGPU_ROCBLAS_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_AMDGPU_ROCRAND_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_AMDGPU_ROCFFT_SELECT(q, func, __VA_ARGS__); \ } \ } \ TEST_RUN_PORTBLAS_SELECT(q, func, __VA_ARGS__); \ - TEST_RUN_PORTFFT_SELECT(q, func, __VA_ARGS__); \ } while (0); -#define TEST_RUN_BLAS_CT_SELECT(q, func, ...) \ +#define TEST_RUN_RNG_CT_SELECT(q, func, ...) \ do { \ if (CHECK_HOST_OR_CPU(q)) \ TEST_RUN_INTELCPU_SELECT(q, func, __VA_ARGS__); \ @@ -238,16 +232,15 @@ if (vendor_id == INTEL_ID) \ TEST_RUN_INTELGPU_SELECT(q, func, __VA_ARGS__); \ else if (vendor_id == NVIDIA_ID) { \ - TEST_RUN_NVIDIAGPU_CUBLAS_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_NVIDIAGPU_CURAND_SELECT(q, func, __VA_ARGS__); \ } \ else if (vendor_id == AMD_ID) { \ - TEST_RUN_AMDGPU_ROCBLAS_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_AMDGPU_ROCRAND_SELECT(q, func, __VA_ARGS__); \ } \ } \ - TEST_RUN_PORTBLAS_SELECT(q, func, __VA_ARGS__); \ } while (0); -#define TEST_RUN_RNG_CT_SELECT(q, func, ...) \ +#define TEST_RUN_LAPACK_CT_SELECT(q, func, ...) \ do { \ if (CHECK_HOST_OR_CPU(q)) \ TEST_RUN_INTELCPU_SELECT(q, func, __VA_ARGS__); \ @@ -257,28 +250,23 @@ if (vendor_id == INTEL_ID) \ TEST_RUN_INTELGPU_SELECT(q, func, __VA_ARGS__); \ else if (vendor_id == NVIDIA_ID) { \ - TEST_RUN_NVIDIAGPU_CURAND_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_NVIDIAGPU_CUSOLVER_SELECT(q, func, __VA_ARGS__); \ } \ else if (vendor_id == AMD_ID) { \ - TEST_RUN_AMDGPU_ROCRAND_SELECT(q, func, __VA_ARGS__); \ + TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, __VA_ARGS__); \ } \ } \ } while (0); -#define TEST_RUN_LAPACK_CT_SELECT(q, func, ...) \ +#define TEST_RUN_SPARSE_CT_SELECT(q, func, ...) \ do { \ if (CHECK_HOST_OR_CPU(q)) \ TEST_RUN_INTELCPU_SELECT(q, func, __VA_ARGS__); \ else if (q.get_device().is_gpu()) { \ unsigned int vendor_id = static_cast( \ q.get_device().get_info()); \ - if (vendor_id == INTEL_ID) \ + if (vendor_id == INTEL_ID) { \ TEST_RUN_INTELGPU_SELECT(q, func, __VA_ARGS__); \ - else if (vendor_id == NVIDIA_ID) { \ - TEST_RUN_NVIDIAGPU_CUSOLVER_SELECT(q, func, __VA_ARGS__); \ - } \ - else if (vendor_id == AMD_ID) { \ - TEST_RUN_AMDGPU_ROCSOLVER_SELECT(q, func, __VA_ARGS__); \ } \ } \ } while (0); diff --git a/tests/unit_tests/sparse_blas/CMakeLists.txt b/tests/unit_tests/sparse_blas/CMakeLists.txt index 2c46cd38c..65e12c981 100644 --- a/tests/unit_tests/sparse_blas/CMakeLists.txt +++ b/tests/unit_tests/sparse_blas/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp new file mode 100644 index 000000000..62b213100 --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp @@ -0,0 +1,238 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _COMMON_SPARSE_REFERENCE_HPP__ +#define _COMMON_SPARSE_REFERENCE_HPP__ + +#include +#include +#include + +#include "oneapi/mkl.hpp" + +#include "test_common.hpp" + +template +inline T conjugate(T) { + static_assert(false, "Unsupported type"); +} +template <> +inline float conjugate(float t) { + return t; +} +template <> +inline double conjugate(double t) { + return t; +} +template <> +inline std::complex conjugate(std::complex t) { + return std::conj(t); +} +template <> +inline std::complex conjugate(std::complex t) { + return std::conj(t); +} + +template +inline T opVal(const T t, const bool isConj) { + return (isConj ? conjugate(t) : t); +}; + +template +void do_csr_transpose(const oneapi::mkl::transpose opA, intType *ia_t, intType *ja_t, fpType *a_t, + intType a_nrows, intType a_ncols, intType indexing, accIntType &ia, + accIntType &ja, accFpType &a, const bool structOnlyFlag = false) { + const bool isConj = (opA == oneapi::mkl::transpose::conjtrans); + + // initialize ia_t to zero + for (intType i = 0; i < a_ncols + 1; ++i) { + ia_t[i] = 0; + } + + // fill ia_t with counts of columns + for (intType i = 0; i < a_nrows; ++i) { + const intType st = ia[i] - indexing; + const intType en = ia[i + 1] - indexing; + for (intType j = st; j < en; ++j) { + const intType col = ja[j] - indexing; + ia_t[col + 1]++; + } + } + // prefix sum to get official ia_t counts + ia_t[0] = indexing; + for (intType i = 0; i < a_ncols; ++i) { + ia_t[i + 1] += ia_t[i]; + } + + // second pass through data to fill transpose structure + for (intType i = 0; i < a_nrows; ++i) { + const intType st = ia[i] - indexing; + const intType en = ia[i + 1] - indexing; + for (intType j = st; j < en; ++j) { + const intType col = ja[j] - indexing; + const intType j_in_a_t = ia_t[col] - indexing; + ia_t[col]++; + ja_t[j_in_a_t] = i + indexing; + if (!structOnlyFlag) { + const fpType val = a[j]; + a_t[j_in_a_t] = opVal(val, isConj); + } + } + } + + // adjust ia_t back to original state after filling structure + for (intType i = a_ncols; i > 0; --i) { + ia_t[i] = ia_t[i - 1]; + } + ia_t[0] = indexing; +} + +// Transpose the given sparse matrix if needed +template +auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpType *a, + intType a_nrows, intType a_ncols, std::size_t nnz, intType indexing, + oneapi::mkl::transpose transpose_val) { + std::vector iopa; + std::vector jopa; + std::vector opa; + if (transpose_val == oneapi::mkl::transpose::nontrans) { + iopa.assign(ia, ia + a_nrows + 1); + jopa.assign(ja, ja + nnz); + opa.assign(a, a + nnz); + } + else if (transpose_val == oneapi::mkl::transpose::trans || + transpose_val == oneapi::mkl::transpose::conjtrans) { + iopa.resize(static_cast(a_ncols + 1)); + jopa.resize(nnz); + opa.resize(nnz); + do_csr_transpose(transpose_val, iopa.data(), jopa.data(), opa.data(), a_nrows, a_ncols, + indexing, ia, ja, a); + } + else { + throw std::runtime_error("unsupported transpose_val=" + + std::to_string(static_cast(transpose_val))); + } + return std::make_tuple(iopa, jopa, opa); +} + +/// Reduce the leading dimension to the minimum and transpose the matrix if needed +/// The outputted matrix always uses row major layout +template +auto extract_dense_matrix(const fpType *x, std::size_t nrows, std::size_t ncols, std::size_t ld, + oneapi::mkl::transpose transpose_val, + oneapi::mkl::layout dense_matrix_layout) { + const bool is_row_major = dense_matrix_layout == oneapi::mkl::layout::row_major; + const bool is_transposed = transpose_val != oneapi::mkl::transpose::nontrans; + const bool apply_conjugate = transpose_val == oneapi::mkl::transpose::conjtrans; + const bool swap_ld = is_row_major != is_transposed; + if (swap_ld && ncols > ld) { + throw std::runtime_error("Expected ncols <= ld"); + } + if (!swap_ld && nrows > ld) { + throw std::runtime_error("Expected nrows <= ld"); + } + + // Copy with a default leading dimension and transpose if needed + std::vector opx(nrows * ncols); + for (std::size_t i = 0; i < nrows; ++i) { + for (std::size_t j = 0; j < ncols; ++j) { + auto val = swap_ld ? x[i * ld + j] : x[j * ld + i]; + opx[i * ncols + j] = opVal(val, apply_conjugate); + } + } + return opx; +} + +/// Convert the sparse matrix in the given format to a dense matrix A in row major layout applied with A_view. +template +std::vector sparse_to_dense(sparse_matrix_format_t format, const intType *ia, + const intType *ja, const fpType *a, std::size_t a_nrows, + std::size_t a_ncols, std::size_t nnz, intType indexing, + oneapi::mkl::transpose transpose_val, + oneapi::mkl::sparse::matrix_view A_view) { + oneapi::mkl::sparse::matrix_descr type_view = A_view.type_view; + oneapi::mkl::uplo uplo_val = A_view.uplo_view; + const bool is_symmetric_or_hermitian_view = + type_view == oneapi::mkl::sparse::matrix_descr::symmetric || + type_view == oneapi::mkl::sparse::matrix_descr::hermitian; + const bool apply_conjugate = transpose_val == oneapi::mkl::transpose::conjtrans; + std::vector dense_a(a_nrows * a_ncols, fpType(0)); + + auto write_to_dense_if_needed = [&](std::size_t a_idx, std::size_t row, std::size_t col) { + if ((type_view == oneapi::mkl::sparse::matrix_descr::triangular || + is_symmetric_or_hermitian_view) && + ((uplo_val == oneapi::mkl::uplo::lower && col > row) || + (uplo_val == oneapi::mkl::uplo::upper && col < row))) { + // Read only the upper or lower part of the sparse matrix + return; + } + if (type_view == oneapi::mkl::sparse::matrix_descr::diagonal && col != row) { + // Read only the diagonal + return; + } + // Do not transpose symmetric matrices to simplify the propagation of the symmetric values + std::size_t dense_a_idx = + (!is_symmetric_or_hermitian_view && transpose_val != oneapi::mkl::transpose::nontrans) + ? col * a_nrows + row + : row * a_ncols + col; + fpType val = opVal(a[a_idx], apply_conjugate); + dense_a[dense_a_idx] = val; + }; + + if (format == sparse_matrix_format_t::CSR) { + for (std::size_t row = 0; row < a_nrows; row++) { + for (intType i = ia[row] - indexing; i < ia[row + 1] - indexing; i++) { + std::size_t iu = static_cast(i); + std::size_t col = static_cast(ja[iu] - indexing); + write_to_dense_if_needed(iu, row, col); + } + } + } + else if (format == sparse_matrix_format_t::COO) { + for (std::size_t i = 0; i < nnz; i++) { + std::size_t row = static_cast(ia[i] - indexing); + std::size_t col = static_cast(ja[i] - indexing); + write_to_dense_if_needed(i, row, col); + } + } + + // Write unit diagonal + if (A_view.diag_view == oneapi::mkl::diag::unit && a_nrows == a_ncols) { + for (std::size_t i = 0; i < a_nrows; i++) { + dense_a[i * a_nrows + i] = fpType(1); + } + } + + // Propagate the rest of the symmetric matrix + if (is_symmetric_or_hermitian_view) { + for (std::size_t i = 0; i < a_nrows; ++i) { + for (std::size_t j = i + 1; j < a_ncols; ++j) { + if (uplo_val == oneapi::mkl::uplo::lower) { + dense_a[i * a_ncols + j] = dense_a[j * a_nrows + i]; + } + else { + dense_a[j * a_nrows + i] = dense_a[i * a_ncols + j]; + } + } + } + } + return dense_a; +} + +#endif // _COMMON_SPARSE_REFERENCE_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/sparse_reference.hpp deleted file mode 100644 index ffb876f11..000000000 --- a/tests/unit_tests/sparse_blas/include/sparse_reference.hpp +++ /dev/null @@ -1,297 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#ifndef _SPARSE_REFERENCE_HPP__ -#define _SPARSE_REFERENCE_HPP__ - -#include -#include -#include - -#include "oneapi/mkl.hpp" - -#include "test_common.hpp" - -template -inline T conjugate(T) { - static_assert(false, "Unsupported type"); -} -template <> -inline float conjugate(float t) { - return t; -} -template <> -inline double conjugate(double t) { - return t; -} -template <> -inline std::complex conjugate(std::complex t) { - return std::conj(t); -} -template <> -inline std::complex conjugate(std::complex t) { - return std::conj(t); -} - -template -inline T opVal(const T t, const bool isConj) { - return (isConj ? conjugate(t) : t); -}; - -template -void do_csr_transpose(const oneapi::mkl::transpose opA, intType *ia_t, intType *ja_t, fpType *a_t, - intType a_nrows, intType a_ncols, intType a_ind, accIntType &ia, - accIntType &ja, accFpType &a, const bool structOnlyFlag = false) { - const bool isConj = (opA == oneapi::mkl::transpose::conjtrans); - - // initialize ia_t to zero - for (intType i = 0; i < a_ncols + 1; ++i) { - ia_t[i] = 0; - } - - // fill ia_t with counts of columns - for (intType i = 0; i < a_nrows; ++i) { - const intType st = ia[i] - a_ind; - const intType en = ia[i + 1] - a_ind; - for (intType j = st; j < en; ++j) { - const intType col = ja[j] - a_ind; - ia_t[col + 1]++; - } - } - // prefix sum to get official ia_t counts - ia_t[0] = a_ind; - for (intType i = 0; i < a_ncols; ++i) { - ia_t[i + 1] += ia_t[i]; - } - - // second pass through data to fill transpose structure - for (intType i = 0; i < a_nrows; ++i) { - const intType st = ia[i] - a_ind; - const intType en = ia[i + 1] - a_ind; - for (intType j = st; j < en; ++j) { - const intType col = ja[j] - a_ind; - const intType j_in_a_t = ia_t[col] - a_ind; - ia_t[col]++; - ja_t[j_in_a_t] = i + a_ind; - if (!structOnlyFlag) { - const fpType val = a[j]; - a_t[j_in_a_t] = opVal(val, isConj); - } - } - } - - // adjust ia_t back to original state after filling structure - for (intType i = a_ncols; i > 0; --i) { - ia_t[i] = ia_t[i - 1]; - } - ia_t[0] = a_ind; -} - -// Transpose the given sparse matrix if needed -template -auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpType *a, - intType a_nrows, intType a_ncols, std::size_t nnz, intType a_ind, - oneapi::mkl::transpose transpose_val) { - std::vector iopa; - std::vector jopa; - std::vector opa; - if (transpose_val == oneapi::mkl::transpose::nontrans) { - iopa.assign(ia, ia + a_nrows + 1); - jopa.assign(ja, ja + nnz); - opa.assign(a, a + nnz); - } - else if (transpose_val == oneapi::mkl::transpose::trans || - transpose_val == oneapi::mkl::transpose::conjtrans) { - iopa.resize(static_cast(a_ncols + 1)); - jopa.resize(nnz); - opa.resize(nnz); - do_csr_transpose(transpose_val, iopa.data(), jopa.data(), opa.data(), a_nrows, a_ncols, - a_ind, ia, ja, a); - } - else { - throw std::runtime_error("unsupported transpose_val=" + - std::to_string(static_cast(transpose_val))); - } - return std::make_tuple(iopa, jopa, opa); -} - -template -auto dense_transpose_if_needed(const fpType *x, std::size_t outer_size, std::size_t inner_size, - std::size_t ld, oneapi::mkl::transpose transpose_val) { - std::vector opx; - if (transpose_val == oneapi::mkl::transpose::nontrans) { - opx.assign(x, x + outer_size * ld); - } - else { - opx.resize(outer_size * ld); - for (std::size_t i = 0; i < outer_size; ++i) { - for (std::size_t j = 0; j < inner_size; ++j) { - opx[i + j * ld] = x[i * ld + j]; - } - } - } - return opx; -} - -/// Return the dense matrix A in row major layout. -/// Diagonal values are overwritten with 1s if diag_val is unit. -template -std::vector sparse_to_dense(const intType *ia, const intType *ja, const fpType *a, - std::size_t a_nrows, std::size_t a_ncols, intType a_ind, - oneapi::mkl::transpose transpose_val, - oneapi::mkl::diag diag_val) { - std::vector dense_a(a_nrows * a_ncols, fpType(0)); - for (std::size_t row = 0; row < a_nrows; row++) { - for (intType i = ia[row] - a_ind; i < ia[row + 1] - a_ind; i++) { - std::size_t iu = static_cast(i); - std::size_t col = static_cast(ja[iu] - a_ind); - std::size_t dense_a_idx = transpose_val != oneapi::mkl::transpose::nontrans - ? col * a_nrows + row - : row * a_ncols + col; - fpType val = a[iu]; - if constexpr (complex_info::is_complex) { - if (transpose_val == oneapi::mkl::transpose::conjtrans) { - val = std::conj(val); - } - } - dense_a[dense_a_idx] = val; - } - } - if (diag_val == oneapi::mkl::diag::unit) { - for (std::size_t i = 0; i < a_nrows; ++i) { - dense_a[i * a_ncols + i] = set_fp_value()(1.f, 0.f); - } - } - return dense_a; -} - -template -void prepare_reference_gemv_data(const intType *ia, const intType *ja, const fpType *a, - intType a_nrows, intType a_ncols, intType a_nnz, intType a_ind, - oneapi::mkl::transpose opA, fpType alpha, fpType beta, - const fpType *x, fpType *y_ref) { - std::size_t opa_nrows = - static_cast((opA == oneapi::mkl::transpose::nontrans) ? a_nrows : a_ncols); - const std::size_t nnz = static_cast(a_nnz); - auto [iopa, jopa, opa] = - sparse_transpose_if_needed(ia, ja, a, a_nrows, a_ncols, nnz, a_ind, opA); - - // - // do GEMV operation - // - // y_ref <- alpha * op(A) * x + beta * y_ref - // - for (std::size_t row = 0; row < opa_nrows; row++) { - fpType tmp = 0; - for (intType i = iopa[row] - a_ind; i < iopa[row + 1] - a_ind; i++) { - std::size_t iu = static_cast(i); - std::size_t x_ind = static_cast(jopa[iu] - a_ind); - tmp += opa[iu] * x[x_ind]; - } - - y_ref[row] = alpha * tmp + beta * y_ref[row]; - } -} - -template -void prepare_reference_gemm_data(const intType *ia, const intType *ja, const fpType *a, - intType a_nrows, intType a_ncols, intType c_ncols, intType a_nnz, - intType a_ind, oneapi::mkl::layout dense_matrix_layout, - oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - fpType alpha, fpType beta, intType ldb, intType ldc, - const fpType *b, fpType *c_ref) { - std::size_t opa_nrows = - static_cast((opA == oneapi::mkl::transpose::nontrans) ? a_nrows : a_ncols); - std::size_t opa_ncols = - static_cast((opA == oneapi::mkl::transpose::nontrans) ? a_ncols : a_nrows); - const std::size_t nnz = static_cast(a_nnz); - const std::size_t ldb_u = static_cast(ldb); - const std::size_t ldc_u = static_cast(ldc); - auto [iopa, jopa, opa] = - sparse_transpose_if_needed(ia, ja, a, a_nrows, a_ncols, nnz, a_ind, opA); - - std::size_t b_outer_size = static_cast(opa_ncols); - std::size_t b_inner_size = static_cast(c_ncols); - if (dense_matrix_layout == oneapi::mkl::layout::col_major) { - std::swap(b_outer_size, b_inner_size); - } - auto opb = dense_transpose_if_needed(b, b_outer_size, b_inner_size, ldb_u, opB); - - // - // do GEMM operation - // - // C <- alpha * opA(A) * opB(B) + beta * C - // - if (dense_matrix_layout == oneapi::mkl::layout::row_major) { - for (std::size_t row = 0; row < opa_nrows; row++) { - for (std::size_t col = 0; col < static_cast(c_ncols); col++) { - fpType tmp = 0; - for (std::size_t i = static_cast(iopa[row] - a_ind); - i < static_cast(iopa[row + 1] - a_ind); i++) { - tmp += opa[i] * opb[static_cast(jopa[i] - a_ind) * ldb_u + col]; - } - fpType &c = c_ref[row * ldc_u + col]; - c = alpha * tmp + beta * c; - } - } - } - else { - for (std::size_t col = 0; col < static_cast(c_ncols); col++) { - for (std::size_t row = 0; row < opa_nrows; row++) { - fpType tmp = 0; - for (std::size_t i = static_cast(iopa[row] - a_ind); - i < static_cast(iopa[row + 1] - a_ind); i++) { - tmp += opa[i] * opb[static_cast(jopa[i] - a_ind) + col * ldb_u]; - } - fpType &c = c_ref[row + col * ldc_u]; - c = alpha * tmp + beta * c; - } - } - } -} - -template -void prepare_reference_trsv_data(const intType *ia, const intType *ja, const fpType *a, intType m, - intType a_ind, oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose opA, oneapi::mkl::diag diag_val, - const fpType *x, fpType *y_ref) { - std::size_t mu = static_cast(m); - auto dense_a = sparse_to_dense(ia, ja, a, mu, mu, a_ind, opA, diag_val); - - // - // do TRSV operation - // - // y_ref <- op(A)^-1 * x - // - // Compute each element of the reference one after the other starting from 0 (resp. the end) for a lower (resp. upper) triangular matrix. - // A matrix is considered lowered if it is lower and not transposed or upper and transposed. - const bool is_lower = - (uplo_val == oneapi::mkl::uplo::lower) == (opA == oneapi::mkl::transpose::nontrans); - for (std::size_t row = 0; row < mu; row++) { - std::size_t uplo_row = is_lower ? row : (mu - 1 - row); - fpType rhs = x[uplo_row]; - for (std::size_t col = 0; col < row; col++) { - std::size_t uplo_col = is_lower ? col : (mu - 1 - col); - rhs -= dense_a[uplo_row * mu + uplo_col] * y_ref[uplo_col]; - } - y_ref[uplo_row] = rhs / dense_a[uplo_row * mu + uplo_row]; - } -} - -#endif // _SPARSE_REFERENCE_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index fd1e91a47..c11255a9a 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2023 Intel Corporation +* Copyright 2024 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include #include +#include #if __has_include() #include @@ -38,7 +39,7 @@ #ifdef CALL_RT_API #define CALL_RT_OR_CT(FUNC, QUEUE, ...) FUNC(QUEUE, __VA_ARGS__) #else -#define CALL_RT_OR_CT(FUNC, QUEUE, ...) TEST_RUN_CT_SELECT(QUEUE, FUNC, __VA_ARGS__); +#define CALL_RT_OR_CT(FUNC, QUEUE, ...) TEST_RUN_SPARSE_CT_SELECT(QUEUE, FUNC, __VA_ARGS__); #endif template @@ -53,6 +54,18 @@ struct complex_info> { static const bool is_complex = true; }; +enum sparse_matrix_format_t { + CSR, + COO, +}; + +static std::vector> test_matrix_properties{ + { oneapi::mkl::sparse::matrix_property::sorted }, + { oneapi::mkl::sparse::matrix_property::symmetric }, + { oneapi::mkl::sparse::matrix_property::sorted, + oneapi::mkl::sparse::matrix_property::symmetric } +}; + void print_error_code(sycl::exception const &e); // Catch asynchronous exceptions. @@ -70,17 +83,18 @@ struct exception_handler_t { } }; +struct UsmDeleter { + sycl::queue q; + UsmDeleter(sycl::queue _q) : q(_q) {} + void operator()(void *ptr) { + sycl::free(ptr, q); + } +}; + // Use a unique_ptr to automatically free device memory on unique_ptr destruction. template auto malloc_device_uptr(sycl::queue q, std::size_t num_elts) { - struct Deleter { - sycl::queue q; - Deleter(sycl::queue _q) : q(_q) {} - void operator()(T *ptr) { - sycl::free(ptr, q); - } - }; - return std::unique_ptr(sycl::malloc_device(num_elts, q), Deleter(q)); + return std::unique_ptr(sycl::malloc_device(num_elts, q), UsmDeleter(q)); } // SYCL buffer creation helper. @@ -90,6 +104,40 @@ sycl::buffer make_buffer(const vec &v) { return buf; } +template +void copy_host_to_buffer(sycl::queue queue, const std::vector &src, sycl::buffer dst) { + queue.submit([&](sycl::handler &cgh) { + auto dst_acc = dst.template get_access( + cgh, sycl::range<1>(src.size())); + cgh.copy(src.data(), dst_acc); + }); +} + +template +std::pair swap_if_cond(bool swap, XT x, YT y) { + if (swap) { + return { static_cast(y), static_cast(x) }; + } + else { + return { static_cast(x), static_cast(y) }; + } +} + +template +auto swap_if_cond(bool swap, T x, T y) { + return swap_if_cond(swap, x, y); +} + +template +auto swap_if_transposed(oneapi::mkl::transpose op, XT x, YT y) { + return swap_if_cond(op != oneapi::mkl::transpose::nontrans, x, y); +} + +template +auto swap_if_transposed(oneapi::mkl::transpose op, T x, T y) { + return swap_if_transposed(op, x, y); +} + template struct set_fp_value { inline fpType operator()(fpType real, fpType /*imag*/) { @@ -131,12 +179,14 @@ void rand_vector(std::vector &v, std::size_t n) { template void rand_matrix(std::vector &m, oneapi::mkl::layout layout_val, std::size_t nrows, - std::size_t ncols, std::size_t ld) { + std::size_t ncols, std::size_t ld, + oneapi::mkl::transpose transpose_val = oneapi::mkl::transpose::nontrans) { using fpRealType = typename complex_info::real_type; - std::size_t outer_size = nrows; - std::size_t inner_size = ncols; - if (layout_val == oneapi::mkl::layout::col_major) { - std::swap(outer_size, inner_size); + auto [op_nrows, op_cols] = swap_if_transposed(transpose_val, nrows, ncols); + auto [outer_size, inner_size] = + swap_if_cond(layout_val == oneapi::mkl::layout::row_major, op_cols, op_nrows); + if (inner_size > ld) { + throw std::runtime_error("Expected inner_size <= ld"); } m.resize(outer_size * ld); rand_scalar rand; @@ -151,69 +201,258 @@ void rand_matrix(std::vector &m, oneapi::mkl::layout layout_val, std::si } } -// Creating the 3arrays CSR representation (ia, ja, values) -// of general random sparse matrix -// with density (0 < density <= 1.0) -// -0.5 <= value < 0.5 -// require_diagonal means all diagonal entries guaranteed to be nonzero +/// Generate random value in the range [-0.5, 0.5] +/// The amplitude is guaranteed to be >= 0.1 if is_diag is true +template +fpType generate_data(bool is_diag) { + rand_scalar rand_data; + if (is_diag) { + // Guarantee an amplitude >= 0.1 + fpType sign = (std::rand() % 2) * 2 - 1; + return rand_data(0.1, 0.5) * sign; + } + return rand_data(-0.5, 0.5); +} + +/// Populate the 3 arrays of a random sparse matrix in CSR representation (ia, ja, values) +/// with the given density in range [0, 1] and values in range [-0.5, 0.5]. +/// ja is sorted. +/// require_diagonal means all diagonal entries guaranteed to be nonzero. template -intType generate_random_matrix(const intType nrows, const intType ncols, const double density_val, - intType indexing, std::vector &ia, std::vector &ja, - std::vector &a, bool require_diagonal = false) { +intType generate_random_csr_matrix(const intType nrows, const intType ncols, + const double density_val, intType indexing, + std::vector &ia, std::vector &ja, + std::vector &a, bool is_symmetric, + bool require_diagonal = false) { intType nnz = 0; rand_scalar rand_density; - rand_scalar rand_data; ia.push_back(indexing); // starting index of row0. for (intType i = 0; i < nrows; i++) { - ia.push_back(nnz + indexing); // ending index of row_i. - for (intType j = 0; j < ncols; j++) { - const bool is_diag = require_diagonal && i == j; - if (is_diag || (rand_density(0.0, 1.0) <= density_val)) { - fpType val; - if (is_diag) { - // Guarantee an amplitude >= 0.1 - fpType sign = (std::rand() % 2) * 2 - 1; - val = rand_data(0.1, 0.5) * sign; + if (is_symmetric) { + // Fill the lower triangular part based on the previously filled upper triangle + // This ensures that the ja indices are always sorted + for (intType j = 0; j < i; ++j) { + // Check if element at row j and column i has been added, assuming ja is sorted + intType row_offset_j = ia[static_cast(j)]; + intType num_elts_row_j = ia.at(static_cast(j) + 1) - row_offset_j; + intType ja_idx = 0; + while (ja_idx < num_elts_row_j && + ja[static_cast(row_offset_j + ja_idx)] < i) { + ++ja_idx; } - else { - val = rand_data(-0.5, 0.5); + auto symmetric_idx = static_cast(row_offset_j + ja_idx); + if (ja_idx < num_elts_row_j && ja[symmetric_idx] == i) { + a.push_back(a[symmetric_idx]); + ja.push_back(j + indexing); + nnz++; } - a.push_back(val); + } + } + // Loop through the upper triangular to fill a symmetric matrix + const intType j_start = is_symmetric ? i : 0; + for (intType j = j_start; j < ncols; j++) { + const bool is_diag = require_diagonal && i == j; + const bool force_last_nnz = nnz == 0 && i == nrows - 1 && j == ncols - 1; + if (force_last_nnz || is_diag || (rand_density(0.0, 1.0) <= density_val)) { + a.push_back(generate_data(is_diag)); ja.push_back(j + indexing); nnz++; } } - ia[static_cast(i) + 1] = nnz + indexing; + ia.push_back(nnz + indexing); // ending index of row_i } return nnz; } -// Shuffle the 3arrays CSR representation (ia, ja, values) -// of any sparse matrix and set values serially from 0..nnz. -// Intended for use with sorting. +/// Populate the 3 arrays of a random sparse matrix in COO representation (ia, ja, values) +/// with the given density in range [0, 1] and values in range [-0.5, 0.5]. +/// Indices are sorted by row (ia) then by column (ja). +/// require_diagonal means all diagonal entries guaranteed to be nonzero. template -void shuffle_data(const intType *ia, intType *ja, fpType *a, const std::size_t nrows) { - // - // shuffle indices according to random seed - // - intType indexing = ia[0]; - for (std::size_t i = 0; i < nrows; ++i) { - intType nnz_row = ia[i + 1] - ia[i]; - for (intType j = ia[i] - indexing; j < ia[i + 1] - indexing; ++j) { - intType q = ia[i] - indexing + std::rand() % (nnz_row); - // swap element i and q - std::swap(ja[q], ja[j]); - std::swap(a[q], a[j]); +intType generate_random_coo_matrix(const intType nrows, const intType ncols, + const double density_val, intType indexing, + std::vector &ia, std::vector &ja, + std::vector &a, bool is_symmetric, + bool require_diagonal = false) { + rand_scalar rand_density; + + for (intType i = 0; i < nrows; i++) { + if (is_symmetric) { + // Fill the lower triangular part based on the previously filled upper triangle + // This ensures that the ja indices are always sorted + for (intType j = 0; j < i; ++j) { + // Check if element at row j and column i has been added, assuming ia and ja are sorted + std::size_t idx = 0; + while (idx < ia.size() && ia[idx] - indexing <= j && ja[idx] - indexing < i) { + ++idx; + } + if (idx < ia.size() && ia[idx] - indexing == j && ja[idx] - indexing == i) { + a.push_back(a[idx]); + ia.push_back(i + indexing); + ja.push_back(j + indexing); + } + } } + // Loop through the upper triangular to fill a symmetric matrix + const intType j_start = is_symmetric ? i : 0; + for (intType j = j_start; j < ncols; j++) { + const bool is_diag = require_diagonal && i == j; + const bool force_last_nnz = a.size() == 0 && i == nrows - 1 && j == ncols - 1; + if (force_last_nnz || is_diag || (rand_density(0.0, 1.0) <= density_val)) { + a.push_back(generate_data(is_diag)); + ia.push_back(i + indexing); + ja.push_back(j + indexing); + } + } + } + return static_cast(a.size()); +} + +// Populate the 3 arrays of a random sparse matrix in CSR or COO representation +// with the given density in range [0, 1] and values in range [-0.5, 0.5]. +// require_diagonal means all diagonal entries guaranteed to be nonzero +template +intType generate_random_matrix(sparse_matrix_format_t format, const intType nrows, + const intType ncols, const double density_val, intType indexing, + std::vector &ia, std::vector &ja, + std::vector &a, bool is_symmetric, + bool require_diagonal = false) { + ia.clear(); + ja.clear(); + a.clear(); + if (format == sparse_matrix_format_t::CSR) { + return generate_random_csr_matrix(nrows, ncols, density_val, indexing, ia, ja, a, + is_symmetric, require_diagonal); + } + else if (format == sparse_matrix_format_t::COO) { + return generate_random_coo_matrix(nrows, ncols, density_val, indexing, ia, ja, a, + is_symmetric, require_diagonal); } + throw std::runtime_error("Unsupported sparse format"); +} + +/// Shuffle the 3arrays CSR or COO representation (ia, ja, values) +/// of any sparse matrix. +/// In CSR format, the elements within a row are shuffled without changing ia. +/// In COO format, all the elements are shuffled. +template +void shuffle_sparse_matrix(sparse_matrix_format_t format, intType indexing, intType *ia, + intType *ja, fpType *a, intType nnz, std::size_t nrows) { + if (format == sparse_matrix_format_t::CSR) { + for (std::size_t i = 0; i < nrows; ++i) { + intType nnz_row = ia[i + 1] - ia[i]; + for (intType j = ia[i] - indexing; j < ia[i + 1] - indexing; ++j) { + intType q = ia[i] - indexing + std::rand() % nnz_row; + // Swap elements j and q + std::swap(ja[q], ja[j]); + std::swap(a[q], a[j]); + } + } + } + else if (format == sparse_matrix_format_t::COO) { + for (std::size_t i = 0; i < static_cast(nnz); ++i) { + intType q = std::rand() % nnz; + // Swap elements i and q + std::swap(ia[q], ia[i]); + std::swap(ja[q], ja[i]); + std::swap(a[q], a[i]); + } + } + else { + throw oneapi::mkl::exception("sparse_blas", "shuffle_sparse_matrix", + "Internal error: unsupported format"); + } +} + +/// Initialize a sparse matrix specified by the given format +template +void init_sparse_matrix(sycl::queue &queue, sparse_matrix_format_t format, + oneapi::mkl::sparse::matrix_handle_t *p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, + ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) { + if (format == sparse_matrix_format_t::CSR) { + CALL_RT_OR_CT(oneapi::mkl::sparse::init_csr_matrix, queue, p_smhandle, num_rows, num_cols, + nnz, index, rows, cols, vals); + } + else if (format == sparse_matrix_format_t::COO) { + CALL_RT_OR_CT(oneapi::mkl::sparse::init_coo_matrix, queue, p_smhandle, num_rows, num_cols, + nnz, index, rows, cols, vals); + } + else { + throw oneapi::mkl::exception("sparse_blas", "init_sparse_matrix", + "Internal error: unsupported format"); + } +} + +/// Reset the data of a sparse matrix specified by the given format +template +void set_matrix_data(sycl::queue &queue, sparse_matrix_format_t format, + oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, + ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) { + if (format == sparse_matrix_format_t::CSR) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_matrix_data, queue, smhandle, num_rows, num_cols, + nnz, index, rows, cols, vals); + } + else if (format == sparse_matrix_format_t::COO) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_coo_matrix_data, queue, smhandle, num_rows, num_cols, + nnz, index, rows, cols, vals); + } + else { + throw oneapi::mkl::exception("sparse_blas", "set_matrix_data", + "Internal error: unsupported format"); + } +} + +template +inline void free_handles(sycl::queue &queue, const std::vector dependencies, + HandlesT &&... handles) { + // Fold expression so that handles expands to each value one after the other. + ( + [&] { + if (!handles) { + return; + } + sycl::event event; + if constexpr (std::is_same_v) { + CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_dense_vector, queue, handles, + dependencies); + } + else if constexpr (std::is_same_v) { + CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_dense_matrix, queue, handles, + dependencies); + } + else if constexpr (std::is_same_v) { + CALL_RT_OR_CT(event = oneapi::mkl::sparse::release_sparse_matrix, queue, handles, + dependencies); + } + event.wait(); + }(), + ...); +} + +template +inline void free_handles(sycl::queue &queue, HandlesT &&... handles) { + free_handles(queue, {}, handles...); +} + +template +inline void wait_and_free_handles(sycl::queue &queue, HandlesT &&... handles) { + queue.wait(); + free_handles(queue, handles...); } -inline void wait_and_free(sycl::queue &main_queue, oneapi::mkl::sparse::matrix_handle_t *p_handle) { - main_queue.wait(); - sycl::event ev_release; - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, p_handle); - ev_release.wait(); +inline bool require_square_matrix( + oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties) { + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + return A_view.type_view != oneapi::mkl::sparse::matrix_descr::general || is_symmetric; } template diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp new file mode 100644 index 000000000..6188d4268 --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp @@ -0,0 +1,309 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _TEST_SPMM_HPP__ +#define _TEST_SPMM_HPP__ + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl.hpp" +#include "oneapi/mkl/detail/config.hpp" + +#include "common_sparse_reference.hpp" +#include "test_common.hpp" +#include "test_helper.hpp" + +#include + +/** + * Helper function to run tests in different configuration. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param format Sparse matrix format to use + * @param non_default_algorithms Algorithms compatible with the given format, other than default_alg + * @param transpose_A Transpose value for the A matrix + * @param transpose_B Transpose value for the B matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + * + * The test functions will use different sizes and leading dimensions if the configuration implies a symmetric matrix. + */ +template +void test_helper_with_format_with_transpose( + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + sparse_matrix_format_t format, + const std::vector &non_default_algorithms, + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, int &num_passed, + int &num_skipped) { + double density_A_matrix = 0.8; + fpType fp_zero = set_fp_value()(0.f, 0.f); + fpType fp_one = set_fp_value()(1.f, 0.f); + oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; + oneapi::mkl::layout col_major = oneapi::mkl::layout::col_major; + oneapi::mkl::sparse::spmm_alg default_alg = oneapi::mkl::sparse::spmm_alg::default_alg; + oneapi::mkl::sparse::matrix_view default_A_view; + std::set no_properties; + bool no_reset_data = false; + bool no_scalars_on_device = false; + + { + int m = 4, k = 6, n = 5; + int nrows_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? k : m; + int ncols_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? m : k; + int nrows_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? n : k; + int ncols_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? k : n; + int nrows_C = m; + int ncols_C = n; + int ldb = nrows_B; + int ldc = nrows_C; + + // Basic test + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Reset data + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, true, + no_scalars_on_device), + num_passed, num_skipped); + // Test alpha and beta on the device + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data, true), + num_passed, num_skipped); + // Test index_base 1 + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, + oneapi::mkl::index_base::one, col_major, transpose_A, transpose_B, + fp_one, fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test non-default alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, set_fp_value()(2.f, 1.5f), + fp_zero, ldb, ldc, default_alg, default_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test non-default beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, + set_fp_value()(3.2f, 1.f), ldb, ldc, default_alg, + default_A_view, no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test 0 alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_zero, fp_one, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test 0 alpha and beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_zero, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test non-default ldb + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb + 5, ldc, + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test non-default ldc + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc + 6, + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test row major layout + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, + fp_zero, ncols_B, ncols_C, default_alg, default_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test int64 indices + long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; + auto [long_ldc, long_ldb] = swap_if_transposed(transpose_A, long_nrows_A, long_ncols_A); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i64(dev, format, long_nrows_A, long_ncols_A, long_ncols_C, + density_A_matrix, index_zero, col_major, transpose_A, transpose_B, + fp_one, fp_zero, long_ldb, long_ldc, default_alg, default_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test other algorithms + for (auto alg : non_default_algorithms) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, + index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, + ldb, ldc, alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + } + // Test matrix properties + for (auto properties : test_matrix_properties) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, + index_zero, col_major, transpose_A, transpose_B, fp_one, fp_zero, + ldb, ldc, default_alg, default_A_view, properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + } + } + { + // Test different sizes + int m = 6, k = 2, n = 5; + int nrows_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? k : m; + int ncols_A = (transpose_A != oneapi::mkl::transpose::nontrans) ? m : k; + int nrows_B = (transpose_B != oneapi::mkl::transpose::nontrans) ? n : k; + int nrows_C = m; + int ncols_C = n; + int ldb = nrows_B; + int ldc = nrows_C; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, + col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, + default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + } +} + +/** + * Helper function to test combination of transpose vals. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param format Sparse matrix format to use + * @param non_default_algorithms Algorithms compatible with the given format, other than default_alg + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper_with_format( + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + sparse_matrix_format_t format, + const std::vector &non_default_algorithms, int &num_passed, + int &num_skipped) { + std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::trans, + oneapi::mkl::transpose::conjtrans }; + for (auto transpose_A : transpose_vals) { + for (auto transpose_B : transpose_vals) { + test_helper_with_format_with_transpose( + test_functor_i32, test_functor_i64, dev, format, non_default_algorithms, + transpose_A, transpose_B, num_passed, num_skipped); + } + } +} + +/** + * Helper function to test multiple sparse matrix format and choose valid algorithms. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, + sycl::device *dev, int &num_passed, int &num_skipped) { + test_helper_with_format( + test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, + { oneapi::mkl::sparse::spmm_alg::no_optimize_alg, oneapi::mkl::sparse::spmm_alg::csr_alg1, + oneapi::mkl::sparse::spmm_alg::csr_alg2, oneapi::mkl::sparse::spmm_alg::csr_alg3 }, + num_passed, num_skipped); + test_helper_with_format( + test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::COO, + { oneapi::mkl::sparse::spmm_alg::no_optimize_alg, oneapi::mkl::sparse::spmm_alg::coo_alg1, + oneapi::mkl::sparse::spmm_alg::coo_alg2, oneapi::mkl::sparse::spmm_alg::coo_alg3, + oneapi::mkl::sparse::spmm_alg::coo_alg4 }, + num_passed, num_skipped); +} + +/// Compute spmm reference as a dense operation +template +void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *ia, + const intType *ja, const fpType *a, intType a_nrows, + intType a_ncols, intType c_ncols, intType a_nnz, intType indexing, + oneapi::mkl::layout dense_matrix_layout, + oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + fpType alpha, fpType beta, intType ldb, intType ldc, + const fpType *b, oneapi::mkl::sparse::matrix_view A_view, + fpType *c_ref) { + std::size_t a_nrows_u = static_cast(a_nrows); + std::size_t a_ncols_u = static_cast(a_ncols); + std::size_t c_ncols_u = static_cast(c_ncols); + auto [opa_nrows, opa_ncols] = swap_if_transposed(opA, a_nrows_u, a_ncols_u); + const std::size_t nnz = static_cast(a_nnz); + const std::size_t ldb_u = static_cast(ldb); + const std::size_t ldc_u = static_cast(ldc); + // dense_opa is always row major + auto dense_opa = + sparse_to_dense(format, ia, ja, a, a_nrows_u, a_ncols_u, nnz, indexing, opA, A_view); + + // dense_opb is always row major and not transposed + auto dense_opb = extract_dense_matrix(b, opa_ncols, c_ncols_u, ldb_u, opB, dense_matrix_layout); + + // Return the linear index to access a dense matrix from + auto dense_linear_idx = [=](std::size_t row, std::size_t col, std::size_t ld) { + return (dense_matrix_layout == oneapi::mkl::layout::row_major) ? row * ld + col + : col * ld + row; + }; + + // + // do SPMM operation + // + // C <- alpha * opA(A) * opB(B) + beta * C + // + for (std::size_t row = 0; row < opa_nrows; row++) { + for (std::size_t col = 0; col < c_ncols_u; col++) { + fpType acc = 0; + for (std::size_t i = 0; i < opa_ncols; i++) { + acc += dense_opa[row * opa_ncols + i] * dense_opb[i * c_ncols_u + col]; + } + fpType &c = c_ref[dense_linear_idx(row, col, ldc_u)]; + c = alpha * acc + beta * c; + } + } +} + +#endif // _TEST_SPMM_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp new file mode 100644 index 000000000..f141db893 --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -0,0 +1,283 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _TEST_SPMV_HPP__ +#define _TEST_SPMV_HPP__ + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl.hpp" +#include "oneapi/mkl/detail/config.hpp" + +#include "common_sparse_reference.hpp" +#include "test_common.hpp" +#include "test_helper.hpp" + +#include + +/** + * Helper function to run tests in different configuration. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param format Sparse matrix format to use + * @param non_default_algorithms Algorithms compatible with the given format, other than default_alg + * @param transpose_val Transpose value for the input matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + * + * The test functions will use different sizes if the configuration implies a symmetric matrix. + */ +template +void test_helper_with_format_with_transpose( + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + sparse_matrix_format_t format, + const std::vector &non_default_algorithms, + oneapi::mkl::transpose transpose_val, int &num_passed, int &num_skipped) { + double density_A_matrix = 0.8; + fpType fp_zero = set_fp_value()(0.f, 0.f); + fpType fp_one = set_fp_value()(1.f, 0.f); + int nrows_A = 4, ncols_A = 6; + oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; + oneapi::mkl::sparse::spmv_alg default_alg = oneapi::mkl::sparse::spmv_alg::default_alg; + oneapi::mkl::sparse::matrix_view default_A_view; + std::set no_properties; + bool no_reset_data = false; + bool no_scalars_on_device = false; + + // Basic test + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Reset data + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, default_A_view, no_properties, true, + no_scalars_on_device), + num_passed, num_skipped); + // Test alpha and beta on the device + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, default_A_view, no_properties, no_reset_data, + true), + num_passed, num_skipped); + // Test index_base 1 + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, + oneapi::mkl::index_base::one, transpose_val, fp_one, fp_zero, default_alg, + default_A_view, no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test non-default alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + set_fp_value()(2.f, 1.5f), fp_zero, default_alg, default_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test non-default beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, set_fp_value()(3.2f, 1.f), default_alg, default_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test 0 alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_zero, fp_one, default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test 0 alpha and beta + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_zero, fp_zero, default_alg, default_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test int64 indices + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i64(dev, format, 27L, 13L, density_A_matrix, index_zero, transpose_val, fp_one, + fp_zero, default_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Lower triangular + oneapi::mkl::sparse::matrix_view triangular_A_view( + oneapi::mkl::sparse::matrix_descr::triangular); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, triangular_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Upper triangular + triangular_A_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, triangular_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Lower triangular unit diagonal + oneapi::mkl::sparse::matrix_view triangular_unit_A_view( + oneapi::mkl::sparse::matrix_descr::triangular); + triangular_unit_A_view.diag_view = oneapi::mkl::diag::unit; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, triangular_unit_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Upper triangular unit diagonal + triangular_A_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, triangular_unit_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Lower symmetric + oneapi::mkl::sparse::matrix_view symmetric_view(oneapi::mkl::sparse::matrix_descr::symmetric); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, symmetric_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Upper symmetric + symmetric_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, symmetric_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Lower hermitian + oneapi::mkl::sparse::matrix_view hermitian_view(oneapi::mkl::sparse::matrix_descr::hermitian); + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, hermitian_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Upper hermitian + hermitian_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, transpose_val, + fp_one, fp_zero, default_alg, hermitian_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test other algorithms + for (auto alg : non_default_algorithms) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, alg, default_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + } + // Test matrix properties + for (auto properties : test_matrix_properties) { + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, nrows_A, ncols_A, density_A_matrix, index_zero, + transpose_val, fp_one, fp_zero, default_alg, default_A_view, + properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + } +} + +/** + * Helper function to test combination of transpose vals. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param format Sparse matrix format to use + * @param non_default_algorithms Algorithms compatible with the given format, other than default_alg + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper_with_format( + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + sparse_matrix_format_t format, + const std::vector &non_default_algorithms, int &num_passed, + int &num_skipped) { + std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, + oneapi::mkl::transpose::trans, + oneapi::mkl::transpose::conjtrans }; + for (auto transpose_A : transpose_vals) { + test_helper_with_format_with_transpose(test_functor_i32, test_functor_i64, dev, + format, non_default_algorithms, transpose_A, + num_passed, num_skipped); + } +} + +/** + * Helper function to test multiple sparse matrix format and choose valid algorithms. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, + sycl::device *dev, int &num_passed, int &num_skipped) { + test_helper_with_format( + test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, + { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::csr_alg1, + oneapi::mkl::sparse::spmv_alg::csr_alg2, oneapi::mkl::sparse::spmv_alg::csr_alg3 }, + num_passed, num_skipped); + test_helper_with_format( + test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::COO, + { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::coo_alg1, + oneapi::mkl::sparse::spmv_alg::coo_alg2 }, + num_passed, num_skipped); +} + +/// Compute spmv reference as a dense operation +template +void prepare_reference_spmv_data(sparse_matrix_format_t format, const intType *ia, + const intType *ja, const fpType *a, intType a_nrows, + intType a_ncols, intType a_nnz, intType indexing, + oneapi::mkl::transpose opA, fpType alpha, fpType beta, + const fpType *x, oneapi::mkl::sparse::matrix_view A_view, + fpType *y_ref) { + std::size_t a_nrows_u = static_cast(a_nrows); + std::size_t a_ncols_u = static_cast(a_ncols); + auto [opa_nrows, opa_ncols] = swap_if_transposed(opA, a_nrows_u, a_ncols_u); + const std::size_t nnz = static_cast(a_nnz); + auto dense_opa = + sparse_to_dense(format, ia, ja, a, a_nrows_u, a_ncols_u, nnz, indexing, opA, A_view); + + // + // do SPMV operation + // + // y_ref <- alpha * op(A) * x + beta * y_ref + // + for (std::size_t row = 0; row < opa_nrows; row++) { + fpType acc = 0; + for (std::size_t col = 0; col < opa_ncols; col++) { + acc += dense_opa[row * opa_ncols + col] * x[col]; + } + y_ref[row] = alpha * acc + beta * y_ref[row]; + } +} + +#endif // _TEST_SPMV_HPP__ diff --git a/tests/unit_tests/sparse_blas/include/test_spsv.hpp b/tests/unit_tests/sparse_blas/include/test_spsv.hpp new file mode 100644 index 000000000..bdf9210f8 --- /dev/null +++ b/tests/unit_tests/sparse_blas/include/test_spsv.hpp @@ -0,0 +1,202 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _TEST_SPSV_HPP__ +#define _TEST_SPSV_HPP__ + +#if __has_include() +#include +#else +#include +#endif + +#include "oneapi/mkl.hpp" +#include "oneapi/mkl/detail/config.hpp" + +#include "common_sparse_reference.hpp" +#include "test_common.hpp" +#include "test_helper.hpp" + +#include + +/** + * Helper function to run tests in different configuration. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param transpose_val Transpose value for the input matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, + sycl::device *dev, sparse_matrix_format_t format, + oneapi::mkl::transpose transpose_val, int &num_passed, + int &num_skipped) { + double density_A_matrix = 0.144; + fpType alpha = set_fp_value()(1.f, 0.f); + int m = 277; + oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; + oneapi::mkl::sparse::spsv_alg default_alg = oneapi::mkl::sparse::spsv_alg::default_alg; + oneapi::mkl::sparse::spsv_alg no_optimize_alg = oneapi::mkl::sparse::spsv_alg::no_optimize_alg; + oneapi::mkl::sparse::matrix_view default_A_view(oneapi::mkl::sparse::matrix_descr::triangular); + oneapi::mkl::sparse::matrix_view upper_A_view(oneapi::mkl::sparse::matrix_descr::triangular); + upper_A_view.uplo_view = oneapi::mkl::uplo::upper; + std::set no_properties; + bool no_reset_data = false; + bool no_scalars_on_device = false; + + // Basic test + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, m, density_A_matrix, index_zero, + transpose_val, alpha, default_alg, default_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Reset data + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, default_A_view, no_properties, true, no_scalars_on_device), + num_passed, num_skipped); + // Test alpha on the device + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, default_A_view, no_properties, no_reset_data, true), + num_passed, num_skipped); + // Test index_base 1 + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, oneapi::mkl::index_base::one, + transpose_val, alpha, default_alg, default_A_view, no_properties, + no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test upper triangular matrix + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, m, density_A_matrix, index_zero, + transpose_val, alpha, default_alg, upper_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test lower triangular unit diagonal matrix + oneapi::mkl::sparse::matrix_view triangular_unit_A_view( + oneapi::mkl::sparse::matrix_descr::triangular); + triangular_unit_A_view.diag_view = oneapi::mkl::diag::unit; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, triangular_unit_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test upper triangular unit diagonal matrix + triangular_unit_A_view.uplo_view = oneapi::mkl::uplo::upper; + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, triangular_unit_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test non-default alpha + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, + set_fp_value()(2.f, 1.5f), default_alg, default_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test int64 indices + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i64(dev, format, 15L, density_A_matrix, index_zero, + transpose_val, alpha, default_alg, default_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test lower no_optimize_alg + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + no_optimize_alg, default_A_view, no_properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test upper no_optimize_alg + EXPECT_TRUE_OR_FUTURE_SKIP(test_functor_i32(dev, format, m, density_A_matrix, index_zero, + transpose_val, alpha, no_optimize_alg, upper_A_view, + no_properties, no_reset_data, no_scalars_on_device), + num_passed, num_skipped); + // Test matrix properties + for (auto properties : test_matrix_properties) { + // Basic test with matrix properties + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + default_alg, default_A_view, properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + // Test lower no_optimize_alg with matrix properties + EXPECT_TRUE_OR_FUTURE_SKIP( + test_functor_i32(dev, format, m, density_A_matrix, index_zero, transpose_val, alpha, + no_optimize_alg, default_A_view, properties, no_reset_data, + no_scalars_on_device), + num_passed, num_skipped); + } +} + +/** + * Helper function to test multiple sparse matrix format. + * + * @tparam fpType Complex or scalar, single or double precision type + * @tparam testFunctorI32 Test functor for fpType and int32 + * @tparam testFunctorI64 Test functor for fpType and int64 + * @param dev Device to test + * @param transpose_val Transpose value for the input matrix + * @param num_passed Increase the number of configurations passed + * @param num_skipped Increase the number of configurations skipped + */ +template +void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, + sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, + int &num_skipped) { + test_helper_with_format(test_functor_i32, test_functor_i64, dev, + sparse_matrix_format_t::CSR, transpose_val, num_passed, + num_skipped); + test_helper_with_format(test_functor_i32, test_functor_i64, dev, + sparse_matrix_format_t::COO, transpose_val, num_passed, + num_skipped); +} + +/// Compute spsv reference as a dense operation +template +void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType *ia, + const intType *ja, const fpType *a, intType m, intType nnz, + intType indexing, oneapi::mkl::transpose opA, const fpType *x, + fpType alpha, oneapi::mkl::sparse::matrix_view A_view, + fpType *y_ref) { + std::size_t mu = static_cast(m); + auto dense_opa = sparse_to_dense(format, ia, ja, a, mu, mu, static_cast(nnz), + indexing, opA, A_view); + + // + // do SPSV operation + // + // y_ref <- op(A)^-1 * x + // + // Compute each element of the reference one after the other starting from 0 (resp. the end) for a lower (resp. upper) triangular matrix. + // A matrix is considered lowered if it is lower and not transposed or upper and transposed. + const bool is_lower = + (A_view.uplo_view == oneapi::mkl::uplo::lower) == (opA == oneapi::mkl::transpose::nontrans); + for (std::size_t row = 0; row < mu; row++) { + std::size_t uplo_row = is_lower ? row : (mu - 1 - row); + fpType rhs = alpha * x[uplo_row]; + for (std::size_t col = 0; col < row; col++) { + std::size_t uplo_col = is_lower ? col : (mu - 1 - col); + rhs -= dense_opa[uplo_row * mu + uplo_col] * y_ref[uplo_col]; + } + y_ref[uplo_row] = rhs / dense_opa[uplo_row * mu + uplo_row]; + } +} + +#endif // _TEST_SPSV_HPP__ diff --git a/tests/unit_tests/sparse_blas/source/CMakeLists.txt b/tests/unit_tests/sparse_blas/source/CMakeLists.txt index 3a1fcb288..a9271bfa2 100644 --- a/tests/unit_tests/sparse_blas/source/CMakeLists.txt +++ b/tests/unit_tests/sparse_blas/source/CMakeLists.txt @@ -1,5 +1,5 @@ #=============================================================================== -# Copyright 2023 Intel Corporation +# Copyright 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,12 +18,12 @@ #=============================================================================== set(SPBLAS_SOURCES - "sparse_gemm_buffer.cpp" - "sparse_gemm_usm.cpp" - "sparse_gemv_buffer.cpp" - "sparse_gemv_usm.cpp" - "sparse_trsv_buffer.cpp" - "sparse_trsv_usm.cpp" + "sparse_spmm_buffer.cpp" + "sparse_spmm_usm.cpp" + "sparse_spmv_buffer.cpp" + "sparse_spmv_usm.cpp" + "sparse_spsv_buffer.cpp" + "sparse_spsv_usm.cpp" ) include(WarningsUtils) diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemm_buffer.cpp deleted file mode 100644 index 1c9549fcc..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_gemm_buffer.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType nrows_A, intType ncols_A, intType ncols_C, - double density_A_matrix, oneapi::mkl::index_base index, - oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, intType ldc, - bool opt_1_input, bool opt_2_inputs) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - std::size_t opa_nrows = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); - std::size_t opa_ncols = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - intType nnz = generate_random_matrix(nrows_A, ncols_A, density_A_matrix, - int_index, ia_host, ja_host, a_host); - - // Input and output dense vectors - std::vector b_host, c_host; - rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), - static_cast(ldb)); - rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), - static_cast(ldc)); - std::vector c_ref_host(c_host); - - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows_A)); - - auto ia_buf = make_buffer(ia_host); - auto ja_buf = make_buffer(ja_host); - auto a_buf = make_buffer(a_host); - auto b_buf = make_buffer(b_host); - auto c_buf = make_buffer(c_host); - - sycl::event ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows_A, ncols_A, nnz, - index, ia_buf, ja_buf, a_buf); - - if (opt_1_input) { - CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_gemm, main_queue, transpose_A, handle); - } - - if (opt_2_inputs) { - CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_gemm, main_queue, transpose_A, transpose_B, - dense_matrix_layout, static_cast(ncols_C), handle); - } - - CALL_RT_OR_CT(oneapi::mkl::sparse::gemm, main_queue, dense_matrix_layout, transpose_A, - transpose_B, alpha, handle, b_buf, ncols_C, ldb, beta, c_buf, ldc); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_gemm_data(ia_host.data(), ja_host.data(), a_host.data(), nrows_A, ncols_A, - ncols_C, nnz, int_index, dense_matrix_layout, transpose_A, - transpose_B, alpha, beta, ldb, ldc, b_host.data(), - c_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - auto c_acc = c_buf.get_host_access(sycl::read_only); - bool valid = check_equal_vector(c_acc, c_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseGemmBufferTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_A Transpose value for the A matrix - * @param transpose_B Transpose value for the B matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, int &num_passed, int &num_skipped) { - double density_A_matrix = 0.8; - fpType fp_zero = set_fp_value()(0.f, 0.f); - fpType fp_one = set_fp_value()(1.f, 0.f); - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - oneapi::mkl::layout col_major = oneapi::mkl::layout::col_major; - int nrows_A = 4, ncols_A = 6, ncols_C = 5; - int ldb = transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A; - int ldc = transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A; - bool no_opt_1_input = false; - bool opt_2_inputs = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, oneapi::mkl::index_base::one, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, set_fp_value()(2.f, 1.5f), fp_zero, ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, set_fp_value()(3.2f, 1.f), ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_zero, fp_one, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_zero, fp_zero, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test non-default ldb - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb + 5, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test non-default ldc - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc + 6, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test row major layout - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, fp_zero, ncols_C, - ncols_C, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test int64 indices - long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; - long long_ldb = transpose_A == oneapi::mkl::transpose::nontrans ? long_ncols_A : long_nrows_A; - long long_ldc = transpose_A == oneapi::mkl::transpose::nontrans ? long_nrows_A : long_ncols_A; - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, long_nrows_A, long_ncols_A, long_ncols_C, density_A_matrix, - index_zero, col_major, transpose_A, transpose_B, fp_one, - fp_zero, long_ldb, long_ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Use optimize_gemm with only the sparse gemm input - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, true, false), - num_passed, num_skipped); - // Use the 2 optimize_gemm versions - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, true, true), - num_passed, num_skipped); - // Do not use optimize_gemm - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, false, false), - num_passed, num_skipped); -} - -/** - * Helper function to test combination of transpose vals. - * Only test \p conjtrans if \p fpType is complex. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper_transpose(sycl::device *dev, int &num_passed, int &num_skipped) { - std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, - oneapi::mkl::transpose::trans }; - if (complex_info::is_complex) { - transpose_vals.push_back(oneapi::mkl::transpose::conjtrans); - } - for (auto transpose_A : transpose_vals) { - for (auto transpose_B : transpose_vals) { - test_helper(dev, transpose_A, transpose_B, num_passed, num_skipped); - } - } -} - -TEST_P(SparseGemmBufferTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmBufferTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmBufferTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmBufferTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseGemmBufferTestSuite, SparseGemmBufferTests, - testing::ValuesIn(devices), ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemm_usm.cpp deleted file mode 100644 index 3850f3b99..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_gemm_usm.cpp +++ /dev/null @@ -1,330 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType nrows_A, intType ncols_A, intType ncols_C, - double density_A_matrix, oneapi::mkl::index_base index, - oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, intType ldc, - bool opt_1_input, bool opt_2_inputs) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - std::size_t opa_nrows = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A); - std::size_t opa_ncols = static_cast( - transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - intType nnz = generate_random_matrix(nrows_A, ncols_A, density_A_matrix, - int_index, ia_host, ja_host, a_host); - - // Input and output dense vectors - std::vector b_host, c_host; - rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), - static_cast(ldb)); - rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), - static_cast(ldc)); - std::vector c_ref_host(c_host); - - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows_A)); - - auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); - auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); - auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); - auto b_usm_uptr = malloc_device_uptr(main_queue, b_host.size()); - auto c_usm_uptr = malloc_device_uptr(main_queue, c_host.size()); - - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *b_usm = b_usm_uptr.get(); - fpType *c_usm = c_usm_uptr.get(); - - std::vector mat_dependencies; - std::vector gemm_dependencies; - // Copy host to device - mat_dependencies.push_back( - main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); - gemm_dependencies.push_back( - main_queue.memcpy(b_usm, b_host.data(), b_host.size() * sizeof(fpType))); - gemm_dependencies.push_back( - main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType))); - - sycl::event ev_copy, ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - sycl::event event; - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(event = oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows_A, - ncols_A, nnz, index, ia_usm, ja_usm, a_usm, mat_dependencies); - - if (opt_1_input) { - CALL_RT_OR_CT(event = oneapi::mkl::sparse::optimize_gemm, main_queue, transpose_A, - handle, { event }); - } - - if (opt_2_inputs) { - CALL_RT_OR_CT(event = oneapi::mkl::sparse::optimize_gemm, main_queue, transpose_A, - transpose_B, dense_matrix_layout, static_cast(ncols_C), - handle, { event }); - } - - gemm_dependencies.push_back(event); - CALL_RT_OR_CT(event = oneapi::mkl::sparse::gemm, main_queue, dense_matrix_layout, - transpose_A, transpose_B, alpha, handle, b_usm, ncols_C, ldb, beta, c_usm, - ldc, gemm_dependencies); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle, - { event }); - - ev_copy = main_queue.memcpy(c_host.data(), c_usm, c_host.size() * sizeof(fpType), event); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_gemm_data(ia_host.data(), ja_host.data(), a_host.data(), nrows_A, ncols_A, - ncols_C, nnz, int_index, dense_matrix_layout, transpose_A, - transpose_B, alpha, beta, ldb, ldc, b_host.data(), - c_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - ev_copy.wait_and_throw(); - bool valid = check_equal_vector(c_host, c_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseGemmUsmTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_A Transpose value for the A matrix - * @param transpose_B Transpose value for the B matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_A, - oneapi::mkl::transpose transpose_B, int &num_passed, int &num_skipped) { - double density_A_matrix = 0.8; - fpType fp_zero = set_fp_value()(0.f, 0.f); - fpType fp_one = set_fp_value()(1.f, 0.f); - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - oneapi::mkl::layout col_major = oneapi::mkl::layout::col_major; - int nrows_A = 4, ncols_A = 6, ncols_C = 5; - int ldb = transpose_A == oneapi::mkl::transpose::nontrans ? ncols_A : nrows_A; - int ldc = transpose_A == oneapi::mkl::transpose::nontrans ? nrows_A : ncols_A; - bool no_opt_1_input = false; - bool opt_2_inputs = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, oneapi::mkl::index_base::one, - col_major, transpose_A, transpose_B, fp_one, fp_zero, ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, set_fp_value()(2.f, 1.5f), fp_zero, ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, set_fp_value()(3.2f, 1.f), ldb, ldc, no_opt_1_input, - opt_2_inputs), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_zero, fp_one, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_zero, fp_zero, ldb, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test non-default ldb - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb + 5, ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test non-default ldc - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc + 6, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test row major layout - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, - oneapi::mkl::layout::row_major, transpose_A, transpose_B, fp_one, fp_zero, ncols_C, - ncols_C, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Test int64 indices - long long_nrows_A = 27, long_ncols_A = 13, long_ncols_C = 6; - long long_ldb = transpose_A == oneapi::mkl::transpose::nontrans ? long_ncols_A : long_nrows_A; - long long_ldc = transpose_A == oneapi::mkl::transpose::nontrans ? long_nrows_A : long_ncols_A; - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, long_nrows_A, long_ncols_A, long_ncols_C, density_A_matrix, - index_zero, col_major, transpose_A, transpose_B, fp_one, - fp_zero, long_ldb, long_ldc, no_opt_1_input, opt_2_inputs), - num_passed, num_skipped); - // Use optimize_gemm with only the sparse gemm input - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, true, false), - num_passed, num_skipped); - // Use the 2 optimize_gemm versions - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, true, true), - num_passed, num_skipped); - // Do not use optimize_gemm - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, nrows_A, ncols_A, ncols_C, density_A_matrix, index_zero, col_major, transpose_A, - transpose_B, fp_one, fp_zero, ldb, ldc, false, false), - num_passed, num_skipped); -} - -/** - * Helper function to test combination of transpose vals. - * Only test \p conjtrans if \p fpType is complex. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -auto test_helper_transpose(sycl::device *dev, int &num_passed, int &num_skipped) { - std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, - oneapi::mkl::transpose::trans }; - if (complex_info::is_complex) { - transpose_vals.push_back(oneapi::mkl::transpose::conjtrans); - } - for (auto transpose_A : transpose_vals) { - for (auto transpose_B : transpose_vals) { - test_helper(dev, transpose_A, transpose_B, num_passed, num_skipped); - } - } -} - -TEST_P(SparseGemmUsmTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmUsmTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmUsmTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemmUsmTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper_transpose(GetParam(), num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseGemmUsmTestSuite, SparseGemmUsmTests, testing::ValuesIn(devices), - ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp deleted file mode 100644 index b95636831..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_gemv_buffer.cpp +++ /dev/null @@ -1,230 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matrix, - oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, - fpType beta, bool use_optimize) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - std::size_t opa_nrows = - static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? nrows : ncols); - std::size_t opa_ncols = - static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? ncols : nrows); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - intType nnz = generate_random_matrix(nrows, ncols, density_A_matrix, int_index, - ia_host, ja_host, a_host); - - // Input and output dense vectors - // The input `x` and the input-output `y` are both initialized to random values on host and device. - std::vector x_host, y_host; - rand_vector(x_host, opa_ncols); - rand_vector(y_host, opa_nrows); - std::vector y_ref_host(y_host); - - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows)); - - auto ia_buf = make_buffer(ia_host); - auto ja_buf = make_buffer(ja_host); - auto a_buf = make_buffer(a_host); - auto x_buf = make_buffer(x_host); - auto y_buf = make_buffer(y_host); - - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - sycl::event ev_release; - try { - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows, ncols, nnz, - index, ia_buf, ja_buf, a_buf); - - if (use_optimize) { - CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_gemv, main_queue, transpose_val, handle); - } - - CALL_RT_OR_CT(oneapi::mkl::sparse::gemv, main_queue, transpose_val, alpha, handle, x_buf, - beta, y_buf); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_gemv_data(ia_host.data(), ja_host.data(), a_host.data(), nrows, ncols, nnz, - int_index, transpose_val, alpha, beta, x_host.data(), - y_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - auto y_acc = y_buf.get_host_access(sycl::read_only); - bool valid = check_equal_vector(y_acc, y_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseGemvBufferTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_val Transpose value for the input matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { - double density_A_matrix = 0.8; - fpType fp_zero = set_fp_value()(0.f, 0.f); - fpType fp_one = set_fp_value()(1.f, 0.f); - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - bool use_optimize = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, use_optimize), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, oneapi::mkl::index_base::one, - transpose_val, fp_one, fp_zero, use_optimize), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, - set_fp_value()(2.f, 1.5f), fp_zero, use_optimize), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, - set_fp_value()(3.2f, 1.f), use_optimize), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, fp_one, use_optimize), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, - fp_zero, use_optimize), - num_passed, num_skipped); - // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 27L, 13L, density_A_matrix, index_zero, transpose_val, - fp_one, fp_one, use_optimize), - num_passed, num_skipped); - // Test without optimize_gemv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, false), - num_passed, num_skipped); -} - -TEST_P(SparseGemvBufferTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvBufferTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvBufferTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvBufferTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseGemvBufferTestSuite, SparseGemvBufferTests, - testing::ValuesIn(devices), ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp deleted file mode 100644 index 582e0c6f4..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_gemv_usm.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType nrows, intType ncols, double density_A_matrix, - oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, - fpType beta, bool use_optimize) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - std::size_t opa_nrows = - static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? nrows : ncols); - std::size_t opa_ncols = - static_cast(transpose_val == oneapi::mkl::transpose::nontrans ? ncols : nrows); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - intType nnz = generate_random_matrix(nrows, ncols, density_A_matrix, int_index, - ia_host, ja_host, a_host); - - // Input and output dense vectors - // The input `x` and the input-output `y` are both initialized to random values on host and device. - std::vector x_host, y_host; - rand_vector(x_host, opa_ncols); - rand_vector(y_host, opa_nrows); - std::vector y_ref_host(y_host); - - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), static_cast(nrows)); - - auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); - auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); - auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); - auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); - auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); - - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *x_usm = x_usm_uptr.get(); - fpType *y_usm = y_usm_uptr.get(); - - std::vector mat_dependencies; - std::vector gemv_dependencies; - // Copy host to device - mat_dependencies.push_back( - main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); - gemv_dependencies.push_back( - main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); - gemv_dependencies.push_back( - main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); - - sycl::event ev_copy, ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - sycl::event event; - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(event = oneapi::mkl::sparse::set_csr_data, main_queue, handle, nrows, ncols, - nnz, index, ia_usm, ja_usm, a_usm, mat_dependencies); - - if (use_optimize) { - CALL_RT_OR_CT(event = oneapi::mkl::sparse::optimize_gemv, main_queue, transpose_val, - handle, { event }); - } - - gemv_dependencies.push_back(event); - CALL_RT_OR_CT(event = oneapi::mkl::sparse::gemv, main_queue, transpose_val, alpha, handle, - x_usm, beta, y_usm, gemv_dependencies); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle, - { event }); - - ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), event); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse GEMV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse GEMV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_gemv_data(ia_host.data(), ja_host.data(), a_host.data(), nrows, ncols, nnz, - int_index, transpose_val, alpha, beta, x_host.data(), - y_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - ev_copy.wait_and_throw(); - bool valid = check_equal_vector(y_host, y_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseGemvUsmTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_val Transpose value for the input matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { - double density_A_matrix = 0.8; - fpType fp_zero = set_fp_value()(0.f, 0.f); - fpType fp_one = set_fp_value()(1.f, 0.f); - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - bool use_optimize = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, use_optimize), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, oneapi::mkl::index_base::one, - transpose_val, fp_one, fp_zero, use_optimize), - num_passed, num_skipped); - // Test non-default alpha - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, - set_fp_value()(2.f, 1.5f), fp_zero, use_optimize), - num_passed, num_skipped); - // Test non-default beta - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, - set_fp_value()(3.2f, 1.f), use_optimize), - num_passed, num_skipped); - // Test 0 alpha - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, fp_one, use_optimize), - num_passed, num_skipped); - // Test 0 alpha and beta - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_zero, - fp_zero, use_optimize), - num_passed, num_skipped); - // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 27L, 13L, density_A_matrix, index_zero, transpose_val, - fp_one, fp_one, use_optimize), - num_passed, num_skipped); - // Test without optimize_gemv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, 4, 6, density_A_matrix, index_zero, transpose_val, fp_one, fp_zero, false), - num_passed, num_skipped); -} - -TEST_P(SparseGemvUsmTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvUsmTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvUsmTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseGemvUsmTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseGemvUsmTestSuite, SparseGemvUsmTests, testing::ValuesIn(devices), - ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp new file mode 100644 index 000000000..b6f9e1185 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp @@ -0,0 +1,248 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spmm.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, + intType ncols_C, double density_A_matrix, oneapi::mkl::index_base index, + oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, + intType ldc, oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data, bool test_scalar_on_device) { + if (test_scalar_on_device) { + // Scalars on the device is not planned to be supported with the buffer API + return 1; + } + sycl::queue main_queue(*dev, exception_handler_t()); + + if (require_square_matrix(A_view, matrix_properties)) { + ncols_A = nrows_A; + ncols_C = nrows_A; + ldb = nrows_A; + ldc = nrows_A; + } + auto [opa_nrows, opa_ncols] = swap_if_transposed(transpose_A, nrows_A, ncols_A); + auto [opb_nrows, opb_ncols] = swap_if_transposed(transpose_B, opa_ncols, ncols_C); + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = + generate_random_matrix(format, nrows_A, ncols_A, density_A_matrix, + indexing, ia_host, ja_host, a_host, is_symmetric); + + // Input and output dense vectors + std::vector b_host, c_host; + rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), + static_cast(ldb), transpose_B); + rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), + static_cast(ldc)); + std::vector c_ref_host(c_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + static_cast(nrows_A)); + } + + auto ia_buf = make_buffer(ia_host); + auto ja_buf = make_buffer(ja_host); + auto a_buf = make_buffer(a_host); + auto b_buf = make_buffer(b_host); + auto c_buf = make_buffer(c_host); + + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_matrix_handle_t B_handle = nullptr; + oneapi::mkl::sparse::dense_matrix_handle_t C_handle = nullptr; + oneapi::mkl::sparse::spmm_descr_t descr = nullptr; + try { + init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_buf, + ja_buf, a_buf); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows, + opb_ncols, ldb, dense_matrix_layout, b_buf); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &C_handle, + static_cast(opa_nrows), ncols_C, ldc, dense_matrix_layout, + c_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmm_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_size); + sycl::buffer workspace_buf((sycl::range<1>(workspace_size))); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, &alpha, + A_view, A_handle, B_handle, &beta, C_handle, alg, descr); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, nrows_A, ncols_A, density_A_matrix, indexing, ia_host, ja_host, a_host, + is_symmetric); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), reset_nnz, static_cast(nrows_A)); + } + if (reset_nnz > nnz) { + ia_buf = make_buffer(ia_host); + ja_buf = make_buffer(ja_host); + a_buf = make_buffer(a_host); + } + else { + copy_host_to_buffer(main_queue, ia_host, ia_buf); + copy_host_to_buffer(main_queue, ja_host, ja_buf); + copy_host_to_buffer(main_queue, a_host, a_buf); + } + nnz = reset_nnz; + copy_host_to_buffer(main_queue, c_ref_host, c_buf); + set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_buf, + ja_buf, a_buf); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, + descr, workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_buf = sycl::buffer((sycl::range<1>(workspace_size_2))); + } + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, &alpha, + A_view, A_handle, B_handle, &beta, C_handle, alg, descr); + } + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPMM:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, B_handle, C_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl; + return 0; + } + CALL_RT_OR_CT(oneapi::mkl::sparse::release_spmm_descr, main_queue, descr); + free_handles(main_queue, A_handle, B_handle, C_handle); + + // Compute reference. + prepare_reference_spmm_data(format, ia_host.data(), ja_host.data(), a_host.data(), nrows_A, + ncols_A, ncols_C, nnz, indexing, dense_matrix_layout, transpose_A, + transpose_B, alpha, beta, ldb, ldc, b_host.data(), A_view, + c_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + auto c_acc = c_buf.get_host_access(sycl::read_only); + bool valid = check_equal_vector(c_acc, c_ref_host); + + return static_cast(valid); +} + +class SparseSpmmBufferTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpmmBufferTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmBufferTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmBufferTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmBufferTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpmmBufferTestSuite, SparseSpmmBufferTests, + testing::ValuesIn(devices), ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp new file mode 100644 index 000000000..5778430a6 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp @@ -0,0 +1,296 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spmm.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, + intType ncols_C, double density_A_matrix, oneapi::mkl::index_base index, + oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, + intType ldc, oneapi::mkl::sparse::spmm_alg alg, + oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data, bool test_scalar_on_device) { + sycl::queue main_queue(*dev, exception_handler_t()); + + if (require_square_matrix(A_view, matrix_properties)) { + ncols_A = nrows_A; + ncols_C = nrows_A; + ldb = nrows_A; + ldc = nrows_A; + } + auto [opa_nrows, opa_ncols] = swap_if_transposed(transpose_A, nrows_A, ncols_A); + auto [opb_nrows, opb_ncols] = swap_if_transposed(transpose_B, opa_ncols, ncols_C); + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = + generate_random_matrix(format, nrows_A, ncols_A, density_A_matrix, + indexing, ia_host, ja_host, a_host, is_symmetric); + + // Input and output dense vectors + std::vector b_host, c_host; + rand_matrix(b_host, dense_matrix_layout, opa_ncols, static_cast(ncols_C), + static_cast(ldb), transpose_B); + rand_matrix(c_host, dense_matrix_layout, opa_nrows, static_cast(ncols_C), + static_cast(ldc)); + std::vector c_ref_host(c_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + static_cast(nrows_A)); + } + + auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + auto b_usm_uptr = malloc_device_uptr(main_queue, b_host.size()); + auto c_usm_uptr = malloc_device_uptr(main_queue, c_host.size()); + auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); + auto beta_usm_uptr = malloc_device_uptr(main_queue, 1); + + intType *ia_usm = ia_usm_uptr.get(); + intType *ja_usm = ja_usm_uptr.get(); + fpType *a_usm = a_usm_uptr.get(); + fpType *b_usm = b_usm_uptr.get(); + fpType *c_usm = c_usm_uptr.get(); + + std::vector mat_dependencies; + std::vector spmm_dependencies; + // Copy host to device + mat_dependencies.push_back( + main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); + spmm_dependencies.push_back( + main_queue.memcpy(b_usm, b_host.data(), b_host.size() * sizeof(fpType))); + spmm_dependencies.push_back( + main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType))); + + fpType *alpha_host_or_usm_ptr = α + fpType *beta_host_or_usm_ptr = β + if (test_scalar_on_device) { + spmm_dependencies.push_back( + main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); + spmm_dependencies.push_back(main_queue.memcpy(beta_usm_uptr.get(), &beta, sizeof(fpType))); + alpha_host_or_usm_ptr = alpha_usm_uptr.get(); + beta_host_or_usm_ptr = beta_usm_uptr.get(); + } + + sycl::event ev_copy, ev_spmm; + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_matrix_handle_t B_handle = nullptr; + oneapi::mkl::sparse::dense_matrix_handle_t C_handle = nullptr; + oneapi::mkl::sparse::spmm_descr_t descr = nullptr; + std::unique_ptr workspace_usm(nullptr, UsmDeleter(main_queue)); + try { + init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_usm, + ja_usm, a_usm); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &B_handle, opb_nrows, + opb_ncols, ldb, dense_matrix_layout, b_usm); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_matrix, main_queue, &C_handle, + static_cast(opa_nrows), ncols_C, ldc, dense_matrix_layout, + c_usm); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmm_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_size); + workspace_usm = malloc_device_uptr(main_queue, workspace_size); + + sycl::event ev_opt; + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + workspace_usm.get(), mat_dependencies); + + spmm_dependencies.push_back(ev_opt); + CALL_RT_OR_CT(ev_spmm = oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + spmm_dependencies); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, nrows_A, ncols_A, density_A_matrix, indexing, ia_host, ja_host, a_host, + is_symmetric); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), reset_nnz, static_cast(nrows_A)); + } + if (reset_nnz > nnz) { + // Wait before freeing usm pointers + ev_spmm.wait_and_throw(); + ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + ia_usm = ia_usm_uptr.get(); + ja_usm = ja_usm_uptr.get(); + a_usm = a_usm_uptr.get(); + } + nnz = reset_nnz; + + mat_dependencies.clear(); + mat_dependencies.push_back(main_queue.memcpy( + ia_usm, ia_host.data(), ia_host.size() * sizeof(intType), ev_spmm)); + mat_dependencies.push_back(main_queue.memcpy( + ja_usm, ja_host.data(), ja_host.size() * sizeof(intType), ev_spmm)); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType), ev_spmm)); + mat_dependencies.push_back( + main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType), ev_spmm)); + set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_usm, + ja_usm, a_usm); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmm_buffer_size, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, + descr, workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_usm = malloc_device_uptr(main_queue, workspace_size_2); + } + + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmm_optimize, main_queue, transpose_A, + transpose_B, &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, + descr, workspace_usm.get(), mat_dependencies); + + CALL_RT_OR_CT(ev_spmm = oneapi::mkl::sparse::spmm, main_queue, transpose_A, transpose_B, + &alpha, A_view, A_handle, B_handle, &beta, C_handle, alg, descr, + { ev_opt }); + } + + ev_copy = main_queue.memcpy(c_host.data(), c_usm, c_host.size() * sizeof(fpType), ev_spmm); + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPMM:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, B_handle, C_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl; + return 0; + } + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmm_descr, main_queue, descr, + { ev_spmm }); + ev_release_descr.wait_and_throw(); + free_handles(main_queue, { ev_spmm }, A_handle, B_handle, C_handle); + + // Compute reference. + prepare_reference_spmm_data(format, ia_host.data(), ja_host.data(), a_host.data(), nrows_A, + ncols_A, ncols_C, nnz, indexing, dense_matrix_layout, transpose_A, + transpose_B, alpha, beta, ldb, ldc, b_host.data(), A_view, + c_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + ev_copy.wait_and_throw(); + bool valid = check_equal_vector(c_host, c_ref_host); + + return static_cast(valid); +} + +class SparseSpmmUsmTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpmmUsmTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmUsmTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmUsmTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmmUsmTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmm, test_spmm, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpmmUsmTestSuite, SparseSpmmUsmTests, testing::ValuesIn(devices), + ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp new file mode 100644 index 000000000..3d99f9e94 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -0,0 +1,236 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spmv.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, + double density_A_matrix, oneapi::mkl::index_base index, + oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta, + oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data, bool test_scalar_on_device) { + if (test_scalar_on_device) { + // Scalars on the device is not planned to be supported with the buffer API + return 1; + } + sycl::queue main_queue(*dev, exception_handler_t()); + + if (require_square_matrix(A_view, matrix_properties)) { + ncols_A = nrows_A; + } + auto [opa_nrows, opa_ncols] = swap_if_transposed(transpose_val, nrows_A, ncols_A); + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = + generate_random_matrix(format, nrows_A, ncols_A, density_A_matrix, + indexing, ia_host, ja_host, a_host, is_symmetric); + + // Input and output dense vectors + // The input `x` and the input-output `y` are both initialized to random values on host and device. + std::vector x_host, y_host; + rand_vector(x_host, opa_ncols); + rand_vector(y_host, opa_nrows); + std::vector y_ref_host(y_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + static_cast(nrows_A)); + } + + auto ia_buf = make_buffer(ia_host); + auto ja_buf = make_buffer(ja_host); + auto a_buf = make_buffer(a_host); + auto x_buf = make_buffer(x_host); + auto y_buf = make_buffer(y_host); + + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::spmv_descr_t descr = nullptr; + try { + init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_buf, + ja_buf, a_buf); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, + static_cast(x_host.size()), x_buf); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, + static_cast(y_host.size()), y_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmv_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_size); + sycl::buffer workspace_buf((sycl::range<1>(workspace_size))); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, &beta, y_handle, alg, descr); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, nrows_A, ncols_A, density_A_matrix, indexing, ia_host, ja_host, a_host, + is_symmetric); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), reset_nnz, static_cast(nrows_A)); + } + if (reset_nnz > nnz) { + ia_buf = make_buffer(ia_host); + ja_buf = make_buffer(ja_host); + a_buf = make_buffer(a_host); + } + else { + copy_host_to_buffer(main_queue, ia_host, ia_buf); + copy_host_to_buffer(main_queue, ja_host, ja_buf); + copy_host_to_buffer(main_queue, a_host, a_buf); + } + copy_host_to_buffer(main_queue, y_ref_host, y_buf); + nnz = reset_nnz; + set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_buf, + ja_buf, a_buf); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, + workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_buf = sycl::buffer((sycl::range<1>(workspace_size_2))); + } + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, &beta, y_handle, alg, descr); + } + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPMV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl; + return 0; + } + CALL_RT_OR_CT(oneapi::mkl::sparse::release_spmv_descr, main_queue, descr); + free_handles(main_queue, A_handle, x_handle, y_handle); + + // Compute reference. + prepare_reference_spmv_data(format, ia_host.data(), ja_host.data(), a_host.data(), nrows_A, + ncols_A, nnz, indexing, transpose_val, alpha, beta, x_host.data(), + A_view, y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + auto y_acc = y_buf.get_host_access(sycl::read_only); + bool valid = check_equal_vector(y_acc, y_ref_host); + + return static_cast(valid); +} + +class SparseSpmvBufferTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpmvBufferTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvBufferTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvBufferTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvBufferTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpmvBufferTestSuite, SparseSpmvBufferTests, + testing::ValuesIn(devices), ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp new file mode 100644 index 000000000..ded92a770 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -0,0 +1,287 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spmv.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, + double density_A_matrix, oneapi::mkl::index_base index, + oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta, + oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data, bool test_scalar_on_device) { + sycl::queue main_queue(*dev, exception_handler_t()); + + if (require_square_matrix(A_view, matrix_properties)) { + ncols_A = nrows_A; + } + auto [opa_nrows, opa_ncols] = swap_if_transposed(transpose_val, nrows_A, ncols_A); + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + intType nnz = + generate_random_matrix(format, nrows_A, ncols_A, density_A_matrix, + indexing, ia_host, ja_host, a_host, is_symmetric); + + // Input and output dense vectors + // The input `x` and the input-output `y` are both initialized to random values on host and device. + std::vector x_host, y_host; + rand_vector(x_host, opa_ncols); + rand_vector(y_host, opa_nrows); + std::vector y_ref_host(y_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + static_cast(nrows_A)); + } + + auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); + auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); + auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); + auto beta_usm_uptr = malloc_device_uptr(main_queue, 1); + + intType *ia_usm = ia_usm_uptr.get(); + intType *ja_usm = ja_usm_uptr.get(); + fpType *a_usm = a_usm_uptr.get(); + fpType *x_usm = x_usm_uptr.get(); + fpType *y_usm = y_usm_uptr.get(); + + std::vector mat_dependencies; + std::vector spmv_dependencies; + // Copy host to device + mat_dependencies.push_back( + main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); + spmv_dependencies.push_back( + main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); + spmv_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); + + fpType *alpha_host_or_usm_ptr = α + fpType *beta_host_or_usm_ptr = β + if (test_scalar_on_device) { + spmv_dependencies.push_back( + main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); + spmv_dependencies.push_back(main_queue.memcpy(beta_usm_uptr.get(), &beta, sizeof(fpType))); + alpha_host_or_usm_ptr = alpha_usm_uptr.get(); + beta_host_or_usm_ptr = beta_usm_uptr.get(); + } + + sycl::event ev_copy, ev_spmv; + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::spmv_descr_t descr = nullptr; + std::unique_ptr workspace_usm(nullptr, UsmDeleter(main_queue)); + try { + init_sparse_matrix(main_queue, format, &A_handle, nrows_A, ncols_A, nnz, index, ia_usm, + ja_usm, a_usm); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, + static_cast(x_host.size()), x_usm); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, + static_cast(y_host.size()), y_usm); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spmv_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, workspace_size); + workspace_usm = malloc_device_uptr(main_queue, workspace_size); + + sycl::event ev_opt; + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, workspace_usm.get(), mat_dependencies); + + spmv_dependencies.push_back(ev_opt); + CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, spmv_dependencies); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, nrows_A, ncols_A, density_A_matrix, indexing, ia_host, ja_host, a_host, + is_symmetric); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), reset_nnz, static_cast(nrows_A)); + } + if (reset_nnz > nnz) { + // Wait before freeing usm pointers + ev_spmv.wait_and_throw(); + ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + ia_usm = ia_usm_uptr.get(); + ja_usm = ja_usm_uptr.get(); + a_usm = a_usm_uptr.get(); + } + nnz = reset_nnz; + + mat_dependencies.clear(); + mat_dependencies.push_back(main_queue.memcpy( + ia_usm, ia_host.data(), ia_host.size() * sizeof(intType), ev_spmv)); + mat_dependencies.push_back(main_queue.memcpy( + ja_usm, ja_host.data(), ja_host.size() * sizeof(intType), ev_spmv)); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType), ev_spmv)); + mat_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType), ev_spmv)); + set_matrix_data(main_queue, format, A_handle, nrows_A, ncols_A, nnz, index, ia_usm, + ja_usm, a_usm); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spmv_buffer_size, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_usm = malloc_device_uptr(main_queue, workspace_size_2); + } + + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spmv_optimize, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, workspace_usm.get(), mat_dependencies); + + CALL_RT_OR_CT(ev_spmv = oneapi::mkl::sparse::spmv, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, beta_host_or_usm_ptr, + y_handle, alg, descr, { ev_opt }); + } + + ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spmv); + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPMV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl; + return 0; + } + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spmv_descr, main_queue, descr, + { ev_spmv }); + ev_release_descr.wait_and_throw(); + free_handles(main_queue, { ev_spmv }, A_handle, x_handle, y_handle); + + // Compute reference. + prepare_reference_spmv_data(format, ia_host.data(), ja_host.data(), a_host.data(), nrows_A, + ncols_A, nnz, indexing, transpose_val, alpha, beta, x_host.data(), + A_view, y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + ev_copy.wait_and_throw(); + bool valid = check_equal_vector(y_host, y_ref_host); + + return static_cast(valid); +} + +class SparseSpmvUsmTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpmvUsmTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvUsmTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvUsmTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpmvUsmTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spmv, test_spmv, GetParam(), + num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpmvUsmTestSuite, SparseSpmvUsmTests, testing::ValuesIn(devices), + ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp new file mode 100644 index 000000000..6b276dff4 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp @@ -0,0 +1,246 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spsv.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, double density_A_matrix, + oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, + oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data, bool test_scalar_on_device) { + if (test_scalar_on_device) { + // Scalars on the device is not planned to be supported with the buffer API + return 1; + } + sycl::queue main_queue(*dev, exception_handler_t()); + + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const std::size_t mu = static_cast(m); + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + // Set non-zero values to the diagonal, except if the matrix is viewed as a unit matrix. + const bool require_diagonal = + !(A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal && + A_view.diag_view == oneapi::mkl::diag::unit); + intType nnz = + generate_random_matrix(format, m, m, density_A_matrix, indexing, ia_host, + ja_host, a_host, is_symmetric, require_diagonal); + + // Input dense vector. + // The input `x` is initialized to random values on host and device. + std::vector x_host; + rand_vector(x_host, mu); + + // Output and reference dense vectors. + // They are both initialized with a dummy value to catch more errors. + std::vector y_host(mu, -2.0f); + std::vector y_ref_host(y_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + mu); + } + + auto ia_buf = make_buffer(ia_host); + auto ja_buf = make_buffer(ja_host); + auto a_buf = make_buffer(a_host); + auto x_buf = make_buffer(x_host); + auto y_buf = make_buffer(y_host); + + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::spsv_descr_t descr = nullptr; + try { + init_sparse_matrix(main_queue, format, &A_handle, m, m, nnz, index, ia_buf, ja_buf, a_buf); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, m, x_buf); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, m, y_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spsv_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size); + sycl::buffer workspace_buf((sycl::range<1>(workspace_size))); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, y_handle, alg, descr, workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, y_handle, alg, descr); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, m, m, density_A_matrix, indexing, ia_host, ja_host, a_host, is_symmetric, + require_diagonal); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), reset_nnz, mu); + } + if (reset_nnz > nnz) { + ia_buf = make_buffer(ia_host); + ja_buf = make_buffer(ja_host); + a_buf = make_buffer(a_host); + } + else { + copy_host_to_buffer(main_queue, ia_host, ia_buf); + copy_host_to_buffer(main_queue, ja_host, ja_buf); + copy_host_to_buffer(main_queue, a_host, a_buf); + } + copy_host_to_buffer(main_queue, y_ref_host, y_buf); + nnz = reset_nnz; + set_matrix_data(main_queue, format, A_handle, m, m, nnz, index, ia_buf, ja_buf, a_buf); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_buf = sycl::buffer((sycl::range<1>(workspace_size_2))); + } + + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, &alpha, + A_view, A_handle, x_handle, y_handle, alg, descr, workspace_buf); + + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv, main_queue, transpose_val, &alpha, A_view, + A_handle, x_handle, y_handle, alg, descr); + } + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPSV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl; + return 0; + } + CALL_RT_OR_CT(oneapi::mkl::sparse::release_spsv_descr, main_queue, descr); + free_handles(main_queue, A_handle, x_handle, y_handle); + + // Compute reference. + prepare_reference_spsv_data(format, ia_host.data(), ja_host.data(), a_host.data(), m, nnz, + indexing, transpose_val, x_host.data(), alpha, A_view, + y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + auto y_acc = y_buf.get_host_access(sycl::read_only); + bool valid = check_equal_vector(y_acc, y_ref_host); + + return static_cast(valid); +} + +class SparseSpsvBufferTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpsvBufferTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvBufferTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvBufferTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvBufferTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpsvBufferTestSuite, SparseSpsvBufferTests, + testing::ValuesIn(devices), ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp new file mode 100644 index 000000000..3b58db914 --- /dev/null +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp @@ -0,0 +1,294 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include +#include +#include + +#include "test_spsv.hpp" + +extern std::vector devices; + +namespace { + +template +int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, double density_A_matrix, + oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, + oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view, + const std::set &matrix_properties, + bool reset_data, bool test_scalar_on_device) { + sycl::queue main_queue(*dev, exception_handler_t()); + + intType indexing = (index == oneapi::mkl::index_base::zero) ? 0 : 1; + const std::size_t mu = static_cast(m); + const bool is_sorted = matrix_properties.find(oneapi::mkl::sparse::matrix_property::sorted) != + matrix_properties.cend(); + const bool is_symmetric = + matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != + matrix_properties.cend(); + + // Input matrix + std::vector ia_host, ja_host; + std::vector a_host; + // Set non-zero values to the diagonal, except if the matrix is viewed as a unit matrix. + const bool require_diagonal = + !(A_view.type_view == oneapi::mkl::sparse::matrix_descr::diagonal && + A_view.diag_view == oneapi::mkl::diag::unit); + intType nnz = + generate_random_matrix(format, m, m, density_A_matrix, indexing, ia_host, + ja_host, a_host, is_symmetric, require_diagonal); + + // Input dense vector. + // The input `x` is initialized to random values on host and device. + std::vector x_host; + rand_vector(x_host, mu); + + // Output and reference dense vectors. + // They are both initialized with a dummy value to catch more errors. + std::vector y_host(mu, -2.0f); + std::vector y_ref_host(y_host); + + // Shuffle ordering of column indices/values to test sortedness + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), a_host.data(), nnz, + mu); + } + + auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); + auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); + auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); + + intType *ia_usm = ia_usm_uptr.get(); + intType *ja_usm = ja_usm_uptr.get(); + fpType *a_usm = a_usm_uptr.get(); + fpType *x_usm = x_usm_uptr.get(); + fpType *y_usm = y_usm_uptr.get(); + + std::vector mat_dependencies; + std::vector spsv_dependencies; + // Copy host to device + mat_dependencies.push_back( + main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); + spsv_dependencies.push_back( + main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); + spsv_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); + + fpType *alpha_host_or_usm_ptr = α + if (test_scalar_on_device) { + spsv_dependencies.push_back( + main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); + alpha_host_or_usm_ptr = alpha_usm_uptr.get(); + } + + sycl::event ev_copy, ev_spsv; + oneapi::mkl::sparse::matrix_handle_t A_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t x_handle = nullptr; + oneapi::mkl::sparse::dense_vector_handle_t y_handle = nullptr; + oneapi::mkl::sparse::spsv_descr_t descr = nullptr; + std::unique_ptr workspace_usm(nullptr, UsmDeleter(main_queue)); + try { + init_sparse_matrix(main_queue, format, &A_handle, m, m, nnz, index, ia_usm, ja_usm, a_usm); + for (auto property : matrix_properties) { + CALL_RT_OR_CT(oneapi::mkl::sparse::set_matrix_property, main_queue, A_handle, property); + } + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &x_handle, m, x_usm); + CALL_RT_OR_CT(oneapi::mkl::sparse::init_dense_vector, main_queue, &y_handle, m, y_usm); + + CALL_RT_OR_CT(oneapi::mkl::sparse::init_spsv_descr, main_queue, &descr); + + std::size_t workspace_size = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + workspace_size); + workspace_usm = malloc_device_uptr(main_queue, workspace_size); + + sycl::event ev_opt; + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + workspace_usm.get(), mat_dependencies); + + spsv_dependencies.push_back(ev_opt); + CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + spsv_dependencies); + + if (reset_data) { + intType reset_nnz = generate_random_matrix( + format, m, m, density_A_matrix, indexing, ia_host, ja_host, a_host, is_symmetric, + require_diagonal); + if (!is_sorted) { + shuffle_sparse_matrix(format, indexing, ia_host.data(), ja_host.data(), + a_host.data(), reset_nnz, mu); + } + if (reset_nnz > nnz) { + // Wait before freeing usm pointers + ev_spsv.wait_and_throw(); + ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); + ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); + a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); + ia_usm = ia_usm_uptr.get(); + ja_usm = ja_usm_uptr.get(); + a_usm = a_usm_uptr.get(); + } + nnz = reset_nnz; + + mat_dependencies.clear(); + mat_dependencies.push_back(main_queue.memcpy( + ia_usm, ia_host.data(), ia_host.size() * sizeof(intType), ev_spsv)); + mat_dependencies.push_back(main_queue.memcpy( + ja_usm, ja_host.data(), ja_host.size() * sizeof(intType), ev_spsv)); + mat_dependencies.push_back( + main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType), ev_spsv)); + mat_dependencies.push_back( + main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType), ev_spsv)); + set_matrix_data(main_queue, format, A_handle, m, m, nnz, index, ia_usm, ja_usm, a_usm); + + std::size_t workspace_size_2 = 0; + CALL_RT_OR_CT(oneapi::mkl::sparse::spsv_buffer_size, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + workspace_size_2); + if (workspace_size_2 > workspace_size) { + workspace_usm = malloc_device_uptr(main_queue, workspace_size_2); + } + + CALL_RT_OR_CT(ev_opt = oneapi::mkl::sparse::spsv_optimize, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + workspace_usm.get(), mat_dependencies); + + CALL_RT_OR_CT(ev_spsv = oneapi::mkl::sparse::spsv, main_queue, transpose_val, + alpha_host_or_usm_ptr, A_view, A_handle, x_handle, y_handle, alg, descr, + { ev_opt }); + } + + ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spsv); + } + catch (const sycl::exception &e) { + std::cout << "Caught synchronous SYCL exception during sparse SPSV:\n" + << e.what() << std::endl; + print_error_code(e); + return 0; + } + catch (const oneapi::mkl::unimplemented &e) { + wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); + if (descr) { + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue, + descr); + ev_release_descr.wait(); + } + return test_skipped; + } + catch (const std::runtime_error &error) { + std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl; + return 0; + } + sycl::event ev_release_descr; + CALL_RT_OR_CT(ev_release_descr = oneapi::mkl::sparse::release_spsv_descr, main_queue, descr, + { ev_spsv }); + ev_release_descr.wait_and_throw(); + free_handles(main_queue, { ev_spsv }, A_handle, x_handle, y_handle); + + // Compute reference. + prepare_reference_spsv_data(format, ia_host.data(), ja_host.data(), a_host.data(), m, nnz, + indexing, transpose_val, x_host.data(), alpha, A_view, + y_ref_host.data()); + + // Compare the results of reference implementation and DPC++ implementation. + ev_copy.wait_and_throw(); + bool valid = check_equal_vector(y_host, y_ref_host); + + return static_cast(valid); +} + +class SparseSpsvUsmTests : public ::testing::TestWithParam {}; + +TEST_P(SparseSpsvUsmTests, RealSinglePrecision) { + using fpType = float; + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvUsmTests, RealDoublePrecision) { + using fpType = double; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvUsmTests, ComplexSinglePrecision) { + using fpType = std::complex; + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +TEST_P(SparseSpsvUsmTests, ComplexDoublePrecision) { + using fpType = std::complex; + CHECK_DOUBLE_ON_DEVICE(GetParam()); + int num_passed = 0, num_skipped = 0; + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::nontrans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::trans, num_passed, num_skipped); + test_helper(test_spsv, test_spsv, GetParam(), + oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); + if (num_skipped > 0) { + // Mark that some tests were skipped + GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped + << " configurations." << std::endl; + } +} + +INSTANTIATE_TEST_SUITE_P(SparseSpsvUsmTestSuite, SparseSpsvUsmTests, testing::ValuesIn(devices), + ::DeviceNamePrint()); + +} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_trsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_trsv_buffer.cpp deleted file mode 100644 index 4e82ae1f0..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_trsv_buffer.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType m, double density_A_matrix, oneapi::mkl::index_base index, - oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, - oneapi::mkl::diag diag_val, bool use_optimize) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - const std::size_t mu = static_cast(m); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - // Always require values to be present in the diagonal of the sparse matrix. - // The values set in the matrix don't need to be 1s even if diag_val is unit. - const bool require_diagonal = true; - intType nnz = generate_random_matrix( - m, m, density_A_matrix, int_index, ia_host, ja_host, a_host, require_diagonal); - - // Input dense vector. - // The input `x` is initialized to random values on host and device. - std::vector x_host; - rand_vector(x_host, mu); - - // Output and reference dense vectors. - // They are both initialized with a dummy value to catch more errors. - std::vector y_host(mu, -2.0f); - std::vector y_ref_host(y_host); - - // Intel oneMKL does not support unsorted data if - // `sparse::optimize_trsv()` is not called first. - if (use_optimize) { - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), mu); - } - - auto ia_buf = make_buffer(ia_host); - auto ja_buf = make_buffer(ja_host); - auto a_buf = make_buffer(a_host); - auto x_buf = make_buffer(x_host); - auto y_buf = make_buffer(y_host); - - sycl::event ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(oneapi::mkl::sparse::set_csr_data, main_queue, handle, m, m, nnz, index, - ia_buf, ja_buf, a_buf); - - if (use_optimize) { - CALL_RT_OR_CT(oneapi::mkl::sparse::optimize_trsv, main_queue, uplo_val, transpose_val, - diag_val, handle); - } - - CALL_RT_OR_CT(oneapi::mkl::sparse::trsv, main_queue, uplo_val, transpose_val, diag_val, - handle, x_buf, y_buf); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse TRSV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse TRSV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_trsv_data(ia_host.data(), ja_host.data(), a_host.data(), m, int_index, - uplo_val, transpose_val, diag_val, x_host.data(), - y_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - auto y_acc = y_buf.get_host_access(sycl::read_only); - bool valid = check_equal_vector(y_acc, y_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseTrsvBufferTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_val Transpose value for the input matrix - * @param num_passed Increase the number of configurations passed - * @param num_skipped Increase the number of configurations skipped - */ -template -auto test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { - double density_A_matrix = 0.144; - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - oneapi::mkl::uplo lower = oneapi::mkl::uplo::lower; - oneapi::mkl::diag nonunit = oneapi::mkl::diag::nonunit; - int m = 277; - bool use_optimize = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, index_zero, lower, - transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, oneapi::mkl::index_base::one, - lower, transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - // Test upper triangular matrix - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, oneapi::mkl::uplo::upper, transpose_val, - nonunit, use_optimize), - num_passed, num_skipped); - // Test unit diagonal matrix - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, index_zero, lower, - transpose_val, oneapi::mkl::diag::unit, use_optimize), - num_passed, num_skipped); - // Temporarily disable trsv using long indices on GPU - if (!dev->is_gpu()) { - // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 15L, density_A_matrix, index_zero, lower, - transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - } - // Test lower without optimize_trsv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, lower, transpose_val, nonunit, false), - num_passed, num_skipped); - // Test upper without optimize_trsv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, oneapi::mkl::uplo::upper, transpose_val, - nonunit, false), - num_passed, num_skipped); -} - -TEST_P(SparseTrsvBufferTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvBufferTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvBufferTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvBufferTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseTrsvBufferTestSuite, SparseTrsvBufferTests, - testing::ValuesIn(devices), ::DeviceNamePrint()); - -} // anonymous namespace diff --git a/tests/unit_tests/sparse_blas/source/sparse_trsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_trsv_usm.cpp deleted file mode 100644 index 8292395fb..000000000 --- a/tests/unit_tests/sparse_blas/source/sparse_trsv_usm.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions -* and limitations under the License. -* -* -* SPDX-License-Identifier: Apache-2.0 -*******************************************************************************/ - -#include -#include -#include - -#if __has_include() -#include -#else -#include -#endif - -#include "oneapi/mkl.hpp" -#include "oneapi/mkl/detail/config.hpp" -#include "sparse_reference.hpp" -#include "test_common.hpp" -#include "test_helper.hpp" - -#include - -extern std::vector devices; - -namespace { - -template -int test(sycl::device *dev, intType m, double density_A_matrix, oneapi::mkl::index_base index, - oneapi::mkl::uplo uplo_val, oneapi::mkl::transpose transpose_val, - oneapi::mkl::diag diag_val, bool use_optimize) { - sycl::queue main_queue(*dev, exception_handler_t()); - - intType int_index = (index == oneapi::mkl::index_base::zero) ? 0 : 1; - const std::size_t mu = static_cast(m); - - // Input matrix in CSR format - std::vector ia_host, ja_host; - std::vector a_host; - const bool require_diagonal = diag_val == oneapi::mkl::diag::nonunit; - intType nnz = generate_random_matrix( - m, m, density_A_matrix, int_index, ia_host, ja_host, a_host, require_diagonal); - - // Input dense vector. - // The input `x` is initialized to random values on host and device. - std::vector x_host; - rand_vector(x_host, mu); - - // Output and reference dense vectors. - // They are both initialized with a dummy value to catch more errors. - std::vector y_host(mu, -2.0f); - std::vector y_ref_host(y_host); - - // Intel oneMKL does not support unsorted data if - // `sparse::optimize_trsv()` is not called first. - if (use_optimize) { - // Shuffle ordering of column indices/values to test sortedness - shuffle_data(ia_host.data(), ja_host.data(), a_host.data(), mu); - } - - auto ia_usm_uptr = malloc_device_uptr(main_queue, ia_host.size()); - auto ja_usm_uptr = malloc_device_uptr(main_queue, ja_host.size()); - auto a_usm_uptr = malloc_device_uptr(main_queue, a_host.size()); - auto x_usm_uptr = malloc_device_uptr(main_queue, x_host.size()); - auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); - - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *x_usm = x_usm_uptr.get(); - fpType *y_usm = y_usm_uptr.get(); - - std::vector mat_dependencies; - std::vector trsv_dependencies; - // Copy host to device - mat_dependencies.push_back( - main_queue.memcpy(ia_usm, ia_host.data(), ia_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(ja_usm, ja_host.data(), ja_host.size() * sizeof(intType))); - mat_dependencies.push_back( - main_queue.memcpy(a_usm, a_host.data(), a_host.size() * sizeof(fpType))); - trsv_dependencies.push_back( - main_queue.memcpy(x_usm, x_host.data(), x_host.size() * sizeof(fpType))); - trsv_dependencies.push_back( - main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); - - sycl::event ev_copy, ev_release; - oneapi::mkl::sparse::matrix_handle_t handle = nullptr; - try { - sycl::event event; - CALL_RT_OR_CT(oneapi::mkl::sparse::init_matrix_handle, main_queue, &handle); - - CALL_RT_OR_CT(event = oneapi::mkl::sparse::set_csr_data, main_queue, handle, m, m, nnz, - index, ia_usm, ja_usm, a_usm, mat_dependencies); - - if (use_optimize) { - CALL_RT_OR_CT(event = oneapi::mkl::sparse::optimize_trsv, main_queue, uplo_val, - transpose_val, diag_val, handle, { event }); - } - - trsv_dependencies.push_back(event); - CALL_RT_OR_CT(event = oneapi::mkl::sparse::trsv, main_queue, uplo_val, transpose_val, - diag_val, handle, x_usm, y_usm, trsv_dependencies); - - CALL_RT_OR_CT(ev_release = oneapi::mkl::sparse::release_matrix_handle, main_queue, &handle, - { event }); - - ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), event); - } - catch (const sycl::exception &e) { - std::cout << "Caught synchronous SYCL exception during sparse TRSV:\n" - << e.what() << std::endl; - print_error_code(e); - return 0; - } - catch (const oneapi::mkl::unimplemented &e) { - wait_and_free(main_queue, &handle); - return test_skipped; - } - catch (const std::runtime_error &error) { - std::cout << "Error raised during execution of sparse TRSV:\n" << error.what() << std::endl; - return 0; - } - - // Compute reference. - prepare_reference_trsv_data(ia_host.data(), ja_host.data(), a_host.data(), m, int_index, - uplo_val, transpose_val, diag_val, x_host.data(), - y_ref_host.data()); - - // Compare the results of reference implementation and DPC++ implementation. - ev_copy.wait_and_throw(); - bool valid = check_equal_vector(y_host, y_ref_host); - - ev_release.wait_and_throw(); - return static_cast(valid); -} - -class SparseTrsvUsmTests : public ::testing::TestWithParam {}; - -/** - * Helper function to run tests in different configuration. - * - * @tparam fpType Complex or scalar, single or double precision type - * @param dev Device to test - * @param transpose_val Transpose value for the input matrix - */ -template -void test_helper(sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { - double density_A_matrix = 0.144; - oneapi::mkl::index_base index_zero = oneapi::mkl::index_base::zero; - oneapi::mkl::uplo lower = oneapi::mkl::uplo::lower; - oneapi::mkl::diag nonunit = oneapi::mkl::diag::nonunit; - int m = 277; - bool use_optimize = true; - - // Basic test - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, index_zero, lower, - transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - // Test index_base 1 - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, oneapi::mkl::index_base::one, - lower, transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - // Test upper triangular matrix - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, oneapi::mkl::uplo::upper, transpose_val, - nonunit, use_optimize), - num_passed, num_skipped); - // Test unit diagonal matrix - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, m, density_A_matrix, index_zero, lower, - transpose_val, oneapi::mkl::diag::unit, use_optimize), - num_passed, num_skipped); - // Temporarily disable trsv using long indices on GPU - if (!dev->is_gpu()) { - // Test int64 indices - EXPECT_TRUE_OR_FUTURE_SKIP(test(dev, 15L, density_A_matrix, index_zero, lower, - transpose_val, nonunit, use_optimize), - num_passed, num_skipped); - } - // Test lower without optimize_trsv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, lower, transpose_val, nonunit, false), - num_passed, num_skipped); - // Test upper without optimize_trsv - EXPECT_TRUE_OR_FUTURE_SKIP( - test(dev, m, density_A_matrix, index_zero, oneapi::mkl::uplo::upper, transpose_val, - nonunit, false), - num_passed, num_skipped); -} - -TEST_P(SparseTrsvUsmTests, RealSinglePrecision) { - using fpType = float; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvUsmTests, RealDoublePrecision) { - using fpType = double; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvUsmTests, ComplexSinglePrecision) { - using fpType = std::complex; - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -TEST_P(SparseTrsvUsmTests, ComplexDoublePrecision) { - using fpType = std::complex; - CHECK_DOUBLE_ON_DEVICE(GetParam()); - int num_passed = 0, num_skipped = 0; - test_helper(GetParam(), oneapi::mkl::transpose::nontrans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::trans, num_passed, num_skipped); - test_helper(GetParam(), oneapi::mkl::transpose::conjtrans, num_passed, num_skipped); - if (num_skipped > 0) { - // Mark that some tests were skipped - GTEST_SKIP() << "Passed: " << num_passed << ", Skipped: " << num_skipped - << " configurations." << std::endl; - } -} - -INSTANTIATE_TEST_SUITE_P(SparseTrsvUsmTestSuite, SparseTrsvUsmTests, testing::ValuesIn(devices), - ::DeviceNamePrint()); - -} // anonymous namespace